Training in progress, step 100, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/model-00001-of-00002.safetensors +1 -1
last-checkpoint/model-00002-of-00002.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +47 -4

last-checkpoint/model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0a9170cbed119344f7357c62df8457537fe0a7644849cb4823e083c8e4f578e6
 size 4995335576

 version https://git-lfs.github.com/spec/v1
+oid sha256:b9295ebc05734a85b5f43993ee4c64b21a5c9bb19c7285c3147224043523c975
 size 4995335576

last-checkpoint/model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3a6deda6389e85cf2881897ff87348bebcf52fb243305d421f776a79bc34832d
 size 1857639032

 version https://git-lfs.github.com/spec/v1
+oid sha256:ffe78de4e4b316205c289b24c407040958c5034fa1409b39c7d297f171b1f7ca
 size 1857639032

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3cfdf98d6264dd523f4ce392882686c61263bd8c3bcefcb2739940446834d46d
 size 13706103974

 version https://git-lfs.github.com/spec/v1
+oid sha256:3a00bef7af234902ab8346a2c39aaeeccbe3fbcabcb95b900800eda4225fe706
 size 13706103974

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3cf9097d4513154245c48236b6ec5137b7ee2a21c9f58f2cba798ea275c6026f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f37c40ce327861a7ca13b719d3aa37510a143368b6e74358bdb14becb3899e1e
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:03bfcb5cd3899a02f7a89e2033f35f63eb1a6773ac4ce6695121020cac9264f0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:01bc98a8690d286a0c5c6c74f6f325ac33ceb1fd4ad50ba634b85c5c1612f447
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": 1,
   "best_metric": 1.4945952892303467,
   "best_model_checkpoint": null,
-  "epoch": 0.003123048094940662,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -59,6 +59,49 @@
       "eval_samples_per_second": 19.391,
       "eval_steps_per_second": 19.391,
       "step": 50
     }
   ],
   "logging_steps": 10,
@@ -73,7 +116,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -87,7 +130,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2042310819840000.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

   "best_global_step": 1,
   "best_metric": 1.4945952892303467,
   "best_model_checkpoint": null,
+  "epoch": 0.006246096189881324,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 19.391,
       "eval_steps_per_second": 19.391,
       "step": 50
+    },
+    {
+      "epoch": 0.0037476577139287947,
+      "grad_norm": 46.25,
+      "learning_rate": 0.049698382650241506,
+      "loss": 227.9017,
+      "step": 60
+    },
+    {
+      "epoch": 0.004372267332916927,
+      "grad_norm": 129.0,
+      "learning_rate": 0.04956310770317444,
+      "loss": 91.6515,
+      "step": 70
+    },
+    {
+      "epoch": 0.004996876951905059,
+      "grad_norm": 25.375,
+      "learning_rate": 0.04940309978877575,
+      "loss": 43.7426,
+      "step": 80
+    },
+    {
+      "epoch": 0.005621486570893191,
+      "grad_norm": 26.125,
+      "learning_rate": 0.04921852002145197,
+      "loss": 35.9591,
+      "step": 90
+    },
+    {
+      "epoch": 0.006246096189881324,
+      "grad_norm": 71.5,
+      "learning_rate": 0.04900955425738262,
+      "loss": 25.3901,
+      "step": 100
+    },
+    {
+      "epoch": 0.006246096189881324,
+      "eval_loss": 21.986961364746094,
+      "eval_runtime": 53.4891,
+      "eval_samples_per_second": 15.76,
+      "eval_steps_per_second": 15.76,
+      "step": 100
     }
   ],
   "logging_steps": 10,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 4084621639680000.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null