Training in progress, step 440, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/model-00001-of-00004.safetensors +1 -1
last-checkpoint/model-00002-of-00004.safetensors +1 -1
last-checkpoint/model-00003-of-00004.safetensors +1 -1
last-checkpoint/model-00004-of-00004.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +47 -3

last-checkpoint/model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f6ff65fdaabdbf317ebeba7a949d5d0a4c4d92e83844319c0d7fe8a36188ae52
 size 4976698672

 version https://git-lfs.github.com/spec/v1
+oid sha256:b2fb0b3c74d1ce9cad255087000265c31c25c76f4299532d8b88d85a4441a3e4
 size 4976698672

last-checkpoint/model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75a8866762d9e391b12fc72c391a88203f43a851b8cb313ef7dad4f518fa97c7
 size 4999802720

 version https://git-lfs.github.com/spec/v1
+oid sha256:a5038c9de904dbea4e32315c794fa2074ecc92c50c8eb6195def7911e593e18d
 size 4999802720

last-checkpoint/model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2bdc3634cc2773d067b17c976dcd11b7c7df5940a7571bbb00f5d7df4c638644
 size 4915916176

 version https://git-lfs.github.com/spec/v1
+oid sha256:141f38f7b29ec3541b6db31c4ce70506e88378235faeb8284e0042bdb72ee8f2
 size 4915916176

last-checkpoint/model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f015982ff6bce7542e828f15d4eeef9584a889686885e52763409fdc683cfcdd
 size 1168138808

 version https://git-lfs.github.com/spec/v1
+oid sha256:8b86685c4b1f23b972380ed4f5d5cb1c2ea00d7ffa8cf4923514704537825099
 size 1168138808

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:65e0c53a4e65a879c4f5b6bbea3d323377988982d8d0b1157cdadf6ac17d04c3
 size 16311821444

 version https://git-lfs.github.com/spec/v1
+oid sha256:aa98c4565c961954703f6a0050170f46f8bf17adb206bc140547154f733c23d1
 size 16311821444

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:90b33c83dc0410b94f46921f5ce291ef34ed50ec198c7840b5fe049891543c68
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:eb81452b7889ec50c58e9c6cfe42bd41415f0b599131b3afe2d3e903774fb256
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a673aaf85c0fe6b6c29cb8f3e7dbd829eef637110e4ad9a775f3fcf001c92591
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc08998cd3131e9eceb26f149e13426c0c67832091c26bd1c251b4a9292ec95c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.10346611484738748,
   "eval_steps": 20,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -455,6 +455,50 @@
       "eval_samples_per_second": 8.576,
       "eval_steps_per_second": 4.288,
       "step": 400
     }
   ],
   "logging_steps": 10,
@@ -474,7 +518,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.37764114956288e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.11381272633212623,
   "eval_steps": 20,
+  "global_step": 440,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 8.576,
       "eval_steps_per_second": 4.288,
       "step": 400
+    },
+    {
+      "epoch": 0.10605276771857217,
+      "grad_norm": 17.25,
+      "learning_rate": 0.00019297764858882514,
+      "loss": 3.3748,
+      "step": 410
+    },
+    {
+      "epoch": 0.10863942058975685,
+      "grad_norm": 10.0,
+      "learning_rate": 0.00019248258232139388,
+      "loss": 3.2153,
+      "step": 420
+    },
+    {
+      "epoch": 0.10863942058975685,
+      "eval_loss": 2.999889850616455,
+      "eval_runtime": 110.6746,
+      "eval_samples_per_second": 7.771,
+      "eval_steps_per_second": 3.885,
+      "step": 420
+    },
+    {
+      "epoch": 0.11122607346094154,
+      "grad_norm": 20.5,
+      "learning_rate": 0.00019197133427991436,
+      "loss": 3.0603,
+      "step": 430
+    },
+    {
+      "epoch": 0.11381272633212623,
+      "grad_norm": 71.5,
+      "learning_rate": 0.00019144399391799043,
+      "loss": 2.795,
+      "step": 440
+    },
+    {
+      "epoch": 0.11381272633212623,
+      "eval_loss": 3.199730396270752,
+      "eval_runtime": 97.5912,
+      "eval_samples_per_second": 8.812,
+      "eval_steps_per_second": 4.406,
+      "step": 440
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 8.115405264519168e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null