Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

adapter_model.safetensors +1 -1
optimizer.pt +1 -1
rng_state_0.pth +1 -1
rng_state_1.pth +1 -1
scaler.pt +1 -1
scheduler.pt +1 -1
trainer_state.json +85 -3

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:af95b1c5a220d6a55dfa035d107d9e91bfabf1b2db706471abe4481fe57a2c18
 size 645975704

 version https://git-lfs.github.com/spec/v1
+oid sha256:b3ce68718b8a35a3a048606c7227ce62896cef62da3846077b079cf4e65f928f
 size 645975704

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a94642b6cbfd74bb89c873dfb0f163ca25cacf3de48ebe5602d93d089f703814
 size 1292087499

 version https://git-lfs.github.com/spec/v1
+oid sha256:e9181fa39faa989eb419ddd21e690cc24b9c81c4b9dc4a6788f9876d477a054a
 size 1292087499

rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1076c60313859db66ff0de37f0791c974b8c08d8b63ddb5f7cbe475f61adaae8
 size 14917

 version https://git-lfs.github.com/spec/v1
+oid sha256:a71d6f2e3805dd0fbc1c29e9123bdf79aa32a2021db986be4d7381af5577b720
 size 14917

rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4b18641396ff6afcf33050e5d126e243d87afe83252f83da2f3ac56c52a96e88
 size 14917

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f871e3d061f929ed6b8e9123a74713f9fefb89526ab0c1dde6ff4d5effb9bb4
 size 14917

scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5aa1cb0b33cdae18d9d01e7f84345dc8aa89b0d8db4af3c1d3869e0251d9ef27
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:b4f408f4bf2063c74f79db312cf2d3df67bb058ff318f993c9aede44c2e050c1
 size 1383

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b679dc767877b4670dc9f5034576dafc9e3e774d98ec6626b0317647bc6c0dbb
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:b77c6fe0af7c0d4f86eaffcb54d6c452a11391b58cf3a89cac254d6f14013233
 size 1465

trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 5.0,
   "eval_steps": 500,
-  "global_step": 1040,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -427,6 +427,88 @@
       "eval_samples_per_second": 25.616,
       "eval_steps_per_second": 6.456,
       "step": 1040
     }
   ],
   "logging_steps": 25,
@@ -446,7 +528,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.12159461381505e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 6.0,
   "eval_steps": 500,
+  "global_step": 1248,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 25.616,
       "eval_steps_per_second": 6.456,
       "step": 1040
+    },
+    {
+      "epoch": 5.048250904704463,
+      "grad_norm": 0.02901943400502205,
+      "learning_rate": 9.77906761542642e-05,
+      "loss": 0.0196,
+      "mean_token_accuracy": 0.9936874963573574,
+      "num_tokens": 11906951.0,
+      "step": 1050
+    },
+    {
+      "epoch": 5.168878166465621,
+      "grad_norm": 0.03153559938073158,
+      "learning_rate": 8.685913460440795e-05,
+      "loss": 0.0184,
+      "mean_token_accuracy": 0.994182522892952,
+      "num_tokens": 12191281.0,
+      "step": 1075
+    },
+    {
+      "epoch": 5.2895054282267795,
+      "grad_norm": 0.025045236572623253,
+      "learning_rate": 7.64492214390088e-05,
+      "loss": 0.0186,
+      "mean_token_accuracy": 0.9937938040494919,
+      "num_tokens": 12477131.0,
+      "step": 1100
+    },
+    {
+      "epoch": 5.410132689987937,
+      "grad_norm": 0.027324741706252098,
+      "learning_rate": 6.659313588910162e-05,
+      "loss": 0.0177,
+      "mean_token_accuracy": 0.994279220700264,
+      "num_tokens": 12761273.0,
+      "step": 1125
+    },
+    {
+      "epoch": 5.530759951749095,
+      "grad_norm": 0.026910969987511635,
+      "learning_rate": 5.732136412404048e-05,
+      "loss": 0.0186,
+      "mean_token_accuracy": 0.9938160961866379,
+      "num_tokens": 13045337.0,
+      "step": 1150
+    },
+    {
+      "epoch": 5.651387213510254,
+      "grad_norm": 0.020254185423254967,
+      "learning_rate": 4.8662584953765875e-05,
+      "loss": 0.0173,
+      "mean_token_accuracy": 0.994459273815155,
+      "num_tokens": 13330131.0,
+      "step": 1175
+    },
+    {
+      "epoch": 5.772014475271411,
+      "grad_norm": 0.024621177464723587,
+      "learning_rate": 4.064358112147213e-05,
+      "loss": 0.0172,
+      "mean_token_accuracy": 0.994288050532341,
+      "num_tokens": 13612873.0,
+      "step": 1200
+    },
+    {
+      "epoch": 5.892641737032569,
+      "grad_norm": 0.026756085455417633,
+      "learning_rate": 3.328915646105903e-05,
+      "loss": 0.0175,
+      "mean_token_accuracy": 0.9942980527877807,
+      "num_tokens": 13895752.0,
+      "step": 1225
+    },
+    {
+      "epoch": 6.0,
+      "eval_loss": 0.04168795421719551,
+      "eval_mean_token_accuracy": 0.9895306697455786,
+      "eval_num_tokens": 14130881.0,
+      "eval_runtime": 14.4268,
+      "eval_samples_per_second": 25.577,
+      "eval_steps_per_second": 6.446,
+      "step": 1248
     }
   ],
   "logging_steps": 25,
       "attributes": {}
     }
   },
+  "total_flos": 6.145950713412321e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null