Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

adapter_model.safetensors +1 -1
optimizer.pt +1 -1
rng_state.pth +1 -1
scheduler.pt +1 -1
trainer_state.json +115 -3

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ba8e8c80803d2a11a70df9d4dacb77457eed010f3898a3e490266b2b1d0f304
 size 119801528

 version https://git-lfs.github.com/spec/v1
+oid sha256:b7f65be280500f9efd4e7fcf8f26e2b825a078f269733bf309a52f06a7ab655e
 size 119801528

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e175f62ebf7fe247327e70639ff1cd0a6113e616d2b57e42eb7b074ff5565e78
 size 239893323

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d20cbc270f68359e1c4dcbcf2bbd43789852552e43bdf73ce0e1f01ff8e03f5
 size 239893323

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9d3cd19daaf428db09a275ce94ac3f4a37a8f517f6ec3f0aebca352aaf3b9190
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:4bb492c21af1e402f8bd2306bdfbc3ea5270d01dcc213399de0f30d6c4d5b284
 size 14645

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d3d5af38a40ff70252c3be98535c6d89a09a82aa37dfb5a2e298d6edde343dca
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:aba534ce755aa4b07bf85a12cd1cb3f87d8540ac06b99529635659d02895264e
 size 1465

trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 3.121212121212121,
   "eval_steps": 500,
-  "global_step": 28,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -204,6 +204,118 @@
       "learning_rate": 0.00032725424859373687,
       "loss": 0.5318,
       "step": 28
     }
   ],
   "logging_steps": 1,
@@ -223,7 +335,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1418923874893824.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.96969696969697,
   "eval_steps": 500,
+  "global_step": 44,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00032725424859373687,
       "loss": 0.5318,
       "step": 28
+    },
+    {
+      "epoch": 3.242424242424242,
+      "grad_norm": 0.8166446089744568,
+      "learning_rate": 0.0003019779227044398,
+      "loss": 0.9006,
+      "step": 29
+    },
+    {
+      "epoch": 3.3636363636363638,
+      "grad_norm": 1.2094213962554932,
+      "learning_rate": 0.0002761321158169134,
+      "loss": 0.6391,
+      "step": 30
+    },
+    {
+      "epoch": 3.484848484848485,
+      "grad_norm": 0.9135984778404236,
+      "learning_rate": 0.00025,
+      "loss": 0.6285,
+      "step": 31
+    },
+    {
+      "epoch": 3.606060606060606,
+      "grad_norm": 0.9478852152824402,
+      "learning_rate": 0.00022386788418308668,
+      "loss": 0.6269,
+      "step": 32
+    },
+    {
+      "epoch": 3.7272727272727275,
+      "grad_norm": 0.5533197522163391,
+      "learning_rate": 0.0001980220772955602,
+      "loss": 0.5646,
+      "step": 33
+    },
+    {
+      "epoch": 3.8484848484848486,
+      "grad_norm": 1.0226417779922485,
+      "learning_rate": 0.00017274575140626317,
+      "loss": 0.5521,
+      "step": 34
+    },
+    {
+      "epoch": 3.9696969696969697,
+      "grad_norm": 1.2138278484344482,
+      "learning_rate": 0.00014831583923105,
+      "loss": 0.8734,
+      "step": 35
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 2.8926355838775635,
+      "learning_rate": 0.00012500000000000006,
+      "loss": 0.8668,
+      "step": 36
+    },
+    {
+      "epoch": 4.121212121212121,
+      "grad_norm": 0.9145299792289734,
+      "learning_rate": 0.00010305368692688174,
+      "loss": 0.2851,
+      "step": 37
+    },
+    {
+      "epoch": 4.242424242424242,
+      "grad_norm": 0.7148826718330383,
+      "learning_rate": 8.271734841028553e-05,
+      "loss": 0.6555,
+      "step": 38
+    },
+    {
+      "epoch": 4.363636363636363,
+      "grad_norm": 1.015910267829895,
+      "learning_rate": 6.421379363065141e-05,
+      "loss": 0.3664,
+      "step": 39
+    },
+    {
+      "epoch": 4.484848484848484,
+      "grad_norm": 0.9201410412788391,
+      "learning_rate": 4.7745751406263163e-05,
+      "loss": 0.575,
+      "step": 40
+    },
+    {
+      "epoch": 4.606060606060606,
+      "grad_norm": 0.8212230801582336,
+      "learning_rate": 3.3493649053890325e-05,
+      "loss": 0.4084,
+      "step": 41
+    },
+    {
+      "epoch": 4.7272727272727275,
+      "grad_norm": 0.8163782358169556,
+      "learning_rate": 2.1613635589349755e-05,
+      "loss": 0.3754,
+      "step": 42
+    },
+    {
+      "epoch": 4.848484848484849,
+      "grad_norm": 0.7215772867202759,
+      "learning_rate": 1.2235870926211617e-05,
+      "loss": 0.2209,
+      "step": 43
+    },
+    {
+      "epoch": 4.96969696969697,
+      "grad_norm": 1.07026207447052,
+      "learning_rate": 5.463099816548578e-06,
+      "loss": 0.3138,
+      "step": 44
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2267168340344832.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null