Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

adapter_model.safetensors +1 -1
optimizer.pt +1 -1
rng_state.pth +1 -1
scheduler.pt +1 -1
trainer_state.json +3 -203

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:adbf32216f68817ac7b8e81d84ec05581ee1d4aec78db3102b8b8bfda9c3203a
 size 161515608

 version https://git-lfs.github.com/spec/v1
+oid sha256:1238e11a7bd83abb0038d7f1ee20d6d90f9c39b3e70e08a93260b11901cee5c5
 size 161515608

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b74bcd870dc58a45d5857957da63a7b34ce5562b9a8ed24f282d74c1daa703e
 size 323181259

 version https://git-lfs.github.com/spec/v1
+oid sha256:0230fe0e059307ec2503aabf08f5e2bde7daf4b09ee960fcb69b3dfba125cec1
 size 323181259

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e5b517d1b8e2b0f837c8b00170b154961d4d989feba4326ac25583df7a55c57a
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:7063580a565cb4ab0c1d36b25d817a35a16d1f21f4a993a9f25cdba6efadcb9d
 size 14645

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6e3ed70b691deef80930296c31c1f2faec5c46190c3c196aae31c4481cc14ad8
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:0cc1343ebe01037162a057bcccefc9f328f82750a217d5974a02a6ad6a4bc5ce
 size 1465

trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.175182481751825,
   "eval_steps": 500,
-  "global_step": 75,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -558,206 +558,6 @@
       "mean_token_accuracy": 0.765391580760479,
       "num_tokens": 204197.0,
       "step": 55
-    },
-    {
-      "entropy": 1.411361187696457,
-      "epoch": 3.116788321167883,
-      "grad_norm": 4.21875,
-      "learning_rate": 1.3432073050985201e-05,
-      "loss": 0.7665,
-      "mean_token_accuracy": 0.7553833983838558,
-      "num_tokens": 207610.0,
-      "step": 56
-    },
-    {
-      "entropy": 1.3223325684666634,
-      "epoch": 3.1751824817518246,
-      "grad_norm": 3.71875,
-      "learning_rate": 1.2808754571563827e-05,
-      "loss": 0.804,
-      "mean_token_accuracy": 0.7530029378831387,
-      "num_tokens": 211730.0,
-      "step": 57
-    },
-    {
-      "entropy": 1.2704328149557114,
-      "epoch": 3.2335766423357666,
-      "grad_norm": 3.46875,
-      "learning_rate": 1.2189280281214128e-05,
-      "loss": 0.7542,
-      "mean_token_accuracy": 0.775670263916254,
-      "num_tokens": 216415.0,
-      "step": 58
-    },
-    {
-      "entropy": 1.3555709198117256,
-      "epoch": 3.291970802919708,
-      "grad_norm": 3.9375,
-      "learning_rate": 1.1574736948340163e-05,
-      "loss": 0.7992,
-      "mean_token_accuracy": 0.7488890923559666,
-      "num_tokens": 219953.0,
-      "step": 59
-    },
-    {
-      "entropy": 1.2632866501808167,
-      "epoch": 3.3503649635036497,
-      "grad_norm": 3.578125,
-      "learning_rate": 1.0966202690771015e-05,
-      "loss": 0.75,
-      "mean_token_accuracy": 0.7654453739523888,
-      "num_tokens": 224335.0,
-      "step": 60
-    },
-    {
-      "entropy": 1.2773741334676743,
-      "epoch": 3.408759124087591,
-      "grad_norm": 4.125,
-      "learning_rate": 1.036474508437579e-05,
-      "loss": 0.8394,
-      "mean_token_accuracy": 0.7538279145956039,
-      "num_tokens": 228300.0,
-      "step": 61
-    },
-    {
-      "entropy": 1.2203935906291008,
-      "epoch": 3.4671532846715327,
-      "grad_norm": 4.3125,
-      "learning_rate": 9.771419290172776e-06,
-      "loss": 0.7866,
-      "mean_token_accuracy": 0.7759390734136105,
-      "num_tokens": 231820.0,
-      "step": 62
-    },
-    {
-      "entropy": 1.2281916178762913,
-      "epoch": 3.5255474452554747,
-      "grad_norm": 4.5,
-      "learning_rate": 9.187266203218457e-06,
-      "loss": 0.7456,
-      "mean_token_accuracy": 0.7896540127694607,
-      "num_tokens": 235502.0,
-      "step": 63
-    },
-    {
-      "entropy": 1.1479723155498505,
-      "epoch": 3.5839416058394162,
-      "grad_norm": 3.84375,
-      "learning_rate": 8.61331062652391e-06,
-      "loss": 0.6779,
-      "mean_token_accuracy": 0.7954859808087349,
-      "num_tokens": 239847.0,
-      "step": 64
-    },
-    {
-      "entropy": 1.227071214467287,
-      "epoch": 3.6423357664233578,
-      "grad_norm": 4.78125,
-      "learning_rate": 8.050559473202078e-06,
-      "loss": 0.7642,
-      "mean_token_accuracy": 0.7581925354897976,
-      "num_tokens": 243356.0,
-      "step": 65
-    },
-    {
-      "entropy": 1.131257489323616,
-      "epoch": 3.7007299270072993,
-      "grad_norm": 3.5625,
-      "learning_rate": 7.500000000000004e-06,
-      "loss": 0.7819,
-      "mean_token_accuracy": 0.7654204778373241,
-      "num_tokens": 249682.0,
-      "step": 66
-    },
-    {
-      "entropy": 1.16723557934165,
-      "epoch": 3.759124087591241,
-      "grad_norm": 4.5,
-      "learning_rate": 6.962598075315047e-06,
-      "loss": 0.6689,
-      "mean_token_accuracy": 0.783266007900238,
-      "num_tokens": 253238.0,
-      "step": 67
-    },
-    {
-      "entropy": 1.2070689871907234,
-      "epoch": 3.8175182481751824,
-      "grad_norm": 5.1875,
-      "learning_rate": 6.439296484733526e-06,
-      "loss": 0.7421,
-      "mean_token_accuracy": 0.7796755991876125,
-      "num_tokens": 256423.0,
-      "step": 68
-    },
-    {
-      "entropy": 1.1488405130803585,
-      "epoch": 3.875912408759124,
-      "grad_norm": 5.34375,
-      "learning_rate": 5.931013277064377e-06,
-      "loss": 0.7267,
-      "mean_token_accuracy": 0.7691169492900372,
-      "num_tokens": 259934.0,
-      "step": 69
-    },
-    {
-      "entropy": 1.130510926246643,
-      "epoch": 3.9343065693430654,
-      "grad_norm": 5.25,
-      "learning_rate": 5.438640153769654e-06,
-      "loss": 0.7209,
-      "mean_token_accuracy": 0.7871466726064682,
-      "num_tokens": 263187.0,
-      "step": 70
-    },
-    {
-      "entropy": 1.1477855034172535,
-      "epoch": 3.9927007299270074,
-      "grad_norm": 4.75,
-      "learning_rate": 4.963040904617131e-06,
-      "loss": 0.7762,
-      "mean_token_accuracy": 0.7656804099678993,
-      "num_tokens": 267097.0,
-      "step": 71
-    },
-    {
-      "entropy": 1.09878408908844,
-      "epoch": 4.0,
-      "grad_norm": 12.875,
-      "learning_rate": 4.505049892299517e-06,
-      "loss": 0.7072,
-      "mean_token_accuracy": 0.7617444694042206,
-      "num_tokens": 267588.0,
-      "step": 72
-    },
-    {
-      "entropy": 1.0318926461040974,
-      "epoch": 4.0583941605839415,
-      "grad_norm": 4.28125,
-      "learning_rate": 4.06547058867883e-06,
-      "loss": 0.5992,
-      "mean_token_accuracy": 0.8166146464645863,
-      "num_tokens": 271589.0,
-      "step": 73
-    },
-    {
-      "entropy": 1.1504660807549953,
-      "epoch": 4.116788321167883,
-      "grad_norm": 4.78125,
-      "learning_rate": 3.645074165223656e-06,
-      "loss": 0.606,
-      "mean_token_accuracy": 0.8282722532749176,
-      "num_tokens": 274468.0,
-      "step": 74
-    },
-    {
-      "entropy": 1.1046061255037785,
-      "epoch": 4.175182481751825,
-      "grad_norm": 3.671875,
-      "learning_rate": 3.244598140112404e-06,
-      "loss": 0.6325,
-      "mean_token_accuracy": 0.8047133162617683,
-      "num_tokens": 278830.0,
-      "step": 75
     }
   ],
   "logging_steps": 1,
@@ -777,7 +577,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7471994807169024.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 3.0583941605839415,
   "eval_steps": 500,
+  "global_step": 55,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "mean_token_accuracy": 0.765391580760479,
       "num_tokens": 204197.0,
       "step": 55
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 5469020090400768.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null