Training in progress, step 2100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bea1b48825d279d5ca7532312e7e81957e535191d5f4e4e23c6756d53ffb5dc5
 size 3826461296

 version https://git-lfs.github.com/spec/v1
+oid sha256:f44b60552954a3eb7b67cd9bb47c074f5bb2012c7a637b3c8cc3d9edcf8944b3
 size 3826461296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dc649b7fa91947a37cd4744fb1a38adf59d9a1c0676e9bc59a750dc67ad53fa6
 size 2479955235

 version https://git-lfs.github.com/spec/v1
+oid sha256:a6cc5b7d6a2fac731ee6f768114be3787bc9d22aa5f028f118b2c784c9ffca1f
 size 2479955235

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:53febc76262518d0519b05d74ab6f65dd5851f3bbee84bc1c2b8f6935b1f50de
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:f13dd54935d4d1876d05824ed5aab8e787b691f2aec583b5a7e328fd2bead633
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d68747f6eb2bb192bc48db140d8e66025b016a51ccd2dd4f8273e6973eed04b3
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:1f860d9af78aedd57dafcb10a7d7e5d5d6fe980f28aaf3455f7dda455f8cb9c1
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8,
   "eval_steps": 500,
-  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1408,6 +1408,76 @@
       "learning_rate": 1.0128617363344052e-05,
       "loss": 0.2778,
       "step": 2000
     }
   ],
   "logging_steps": 10,
@@ -1427,7 +1497,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.604261231669248e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.84,
   "eval_steps": 500,
+  "global_step": 2100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.0128617363344052e-05,
       "loss": 0.2778,
       "step": 2000
+    },
+    {
+      "epoch": 0.804,
+      "grad_norm": 6.987481594085693,
+      "learning_rate": 9.927652733118971e-06,
+      "loss": 0.2778,
+      "step": 2010
+    },
+    {
+      "epoch": 0.808,
+      "grad_norm": 11.547746658325195,
+      "learning_rate": 9.726688102893891e-06,
+      "loss": 0.3181,
+      "step": 2020
+    },
+    {
+      "epoch": 0.812,
+      "grad_norm": 7.187608242034912,
+      "learning_rate": 9.525723472668812e-06,
+      "loss": 0.3211,
+      "step": 2030
+    },
+    {
+      "epoch": 0.816,
+      "grad_norm": 14.975872039794922,
+      "learning_rate": 9.32475884244373e-06,
+      "loss": 0.2335,
+      "step": 2040
+    },
+    {
+      "epoch": 0.82,
+      "grad_norm": 5.20744514465332,
+      "learning_rate": 9.123794212218651e-06,
+      "loss": 0.3012,
+      "step": 2050
+    },
+    {
+      "epoch": 0.824,
+      "grad_norm": 9.876429557800293,
+      "learning_rate": 8.92282958199357e-06,
+      "loss": 0.3095,
+      "step": 2060
+    },
+    {
+      "epoch": 0.828,
+      "grad_norm": 7.847969055175781,
+      "learning_rate": 8.72186495176849e-06,
+      "loss": 0.3336,
+      "step": 2070
+    },
+    {
+      "epoch": 0.832,
+      "grad_norm": 5.847342014312744,
+      "learning_rate": 8.520900321543409e-06,
+      "loss": 0.3471,
+      "step": 2080
+    },
+    {
+      "epoch": 0.836,
+      "grad_norm": 12.866349220275879,
+      "learning_rate": 8.319935691318329e-06,
+      "loss": 0.5096,
+      "step": 2090
+    },
+    {
+      "epoch": 0.84,
+      "grad_norm": 5.676148891448975,
+      "learning_rate": 8.118971061093248e-06,
+      "loss": 0.3028,
+      "step": 2100
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 3.789296334928282e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null