Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

checkpoint-latest/model.safetensors +1 -1
checkpoint-latest/optimizer.pt +1 -1
checkpoint-latest/rng_state.pth +1 -1
checkpoint-latest/scaler.pt +1 -1
checkpoint-latest/scheduler.pt +1 -1
checkpoint-latest/trainer_state.json +75 -3

checkpoint-latest/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6f75126c2a34822f1bd2758385a8fd1ca1669f4dff9de1317e4f5b25887d0a0d
 size 62293080

 version https://git-lfs.github.com/spec/v1
+oid sha256:ec5e19390f915af7d82e4e4d4ab59cbb91dfd4e2451e13a4fd5b2fcb2756dcd2
 size 62293080

checkpoint-latest/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b94ba484a1c3a45fc8e9c69eeda0cff023c2452e3377a4ee8a9ae36f58df97f
 size 124642443

 version https://git-lfs.github.com/spec/v1
+oid sha256:f08646ac1adb0510b1dd9481036497e5ccbd6dc461543f2ea2ce32d8d76f3f42
 size 124642443

checkpoint-latest/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b7133fcea647449b2caaa65223ebbc0c180189bbd59dbe842634047836d81e2
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:3b0ec4d2188868fd24263efa2856258953fca7ad21aed2b50e22b491f1d8939f
 size 14645

checkpoint-latest/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e26664050c6d90fb565d76db26661576aa404ce53418da0b68344264e2ee7e47
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:9779a733270277f15e820d84d3dfdfb3a66fd96b857f3f0109ac7f2b54244d67
 size 1383

checkpoint-latest/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ac8a8c1f0ca136b116df977b16704d013fad49cab357d7231675ec3945e85ad2
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:a902ca58dea28be10847ac21293e6d27c44fc74bd49d763b881d90cbd1e58f0a
 size 1465

checkpoint-latest/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5278901988386415,
   "eval_steps": 500,
-  "global_step": 3000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -224,6 +224,78 @@
       "eval_samples_per_second": 166.428,
       "eval_steps_per_second": 6.404,
       "step": 3000
     }
   ],
   "logging_steps": 125,
@@ -243,7 +315,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1760138625024000.0,
   "train_batch_size": 26,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7038535984515221,
   "eval_steps": 500,
+  "global_step": 4000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 166.428,
       "eval_steps_per_second": 6.404,
       "step": 3000
+    },
+    {
+      "epoch": 0.5498856237902516,
+      "grad_norm": 0.0,
+      "learning_rate": 0.0002450290339609361,
+      "loss": 4.0045,
+      "step": 3125
+    },
+    {
+      "epoch": 0.5718810487418617,
+      "grad_norm": NaN,
+      "learning_rate": 0.00024282949146577507,
+      "loss": 4.0077,
+      "step": 3250
+    },
+    {
+      "epoch": 0.5938764736934717,
+      "grad_norm": NaN,
+      "learning_rate": 0.0002406299489706141,
+      "loss": 3.5634,
+      "step": 3375
+    },
+    {
+      "epoch": 0.6158718986450818,
+      "grad_norm": NaN,
+      "learning_rate": 0.0002384304064754531,
+      "loss": 0.0,
+      "step": 3500
+    },
+    {
+      "epoch": 0.6158718986450818,
+      "eval_loss": NaN,
+      "eval_runtime": 58.2581,
+      "eval_samples_per_second": 169.075,
+      "eval_steps_per_second": 6.506,
+      "step": 3500
+    },
+    {
+      "epoch": 0.6378673235966918,
+      "grad_norm": NaN,
+      "learning_rate": 0.00023623086398029207,
+      "loss": 0.0,
+      "step": 3625
+    },
+    {
+      "epoch": 0.659862748548302,
+      "grad_norm": NaN,
+      "learning_rate": 0.00023403132148513107,
+      "loss": 0.0,
+      "step": 3750
+    },
+    {
+      "epoch": 0.6818581734999121,
+      "grad_norm": NaN,
+      "learning_rate": 0.00023183177898997007,
+      "loss": 0.0,
+      "step": 3875
+    },
+    {
+      "epoch": 0.7038535984515221,
+      "grad_norm": NaN,
+      "learning_rate": 0.00022963223649480905,
+      "loss": 0.0,
+      "step": 4000
+    },
+    {
+      "epoch": 0.7038535984515221,
+      "eval_loss": NaN,
+      "eval_runtime": 58.3487,
+      "eval_samples_per_second": 168.813,
+      "eval_steps_per_second": 6.495,
+      "step": 4000
     }
   ],
   "logging_steps": 125,
       "attributes": {}
     }
   },
+  "total_flos": 2346851500032000.0,
   "train_batch_size": 26,
   "trial_name": null,
   "trial_params": null