Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

adapter_model.safetensors +1 -1
optimizer.pt +1 -1
rng_state.pth +1 -1
scaler.pt +1 -1
scheduler.pt +1 -1
trainer_state.json +59 -3

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8e10d1a1f03706f449e3b2bdc8ebbf99c23a139de61252d02b890bf93f15030e
 size 59001752

 version https://git-lfs.github.com/spec/v1
+oid sha256:7353692ff981605c93cf22b096dbd246c5cf5bf5fc543b490cf3abeac82a7a31
 size 59001752

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1da90b91619dd51767af992201daf5098869400c2005e5f7e7e8a21dc66b546f
 size 118086731

 version https://git-lfs.github.com/spec/v1
+oid sha256:149be6cff081d78c7d5b1ec35545a1530746b35e6a028f2e35055b6d9fbeb68c
 size 118086731

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:859b15219f18ea947c283626d03dcbca0c6586c2c7533cd392aab98f051007c1
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:4a3365689e6d26d46f2bfa2d42b0b54103889b4f6a6bad2f60268c5b6325623d
 size 14645

scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0f8849200ab201085ca6ed3af3cd6f832415916a84b1e68395125ecc5ecd39e8
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:3656c0abce9576d0b5083188f2c7d6efcbee0134d27c94bb8b76920474ad16de
 size 1383

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:02dff168b6030b33e9d563d15cebe08c9687f4919e4a5b24732fd416209a558e
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:bebaf2e9a58bbd47b2799ce9fd04cb408b13ca7015b9a1b4e2b2bcad498171ac
 size 1465

trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.011851676271462644,
   "eval_steps": 500,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -36,6 +36,62 @@
       "learning_rate": 0.0004941336809670538,
       "loss": 1.0972,
       "step": 200
     }
   ],
   "logging_steps": 50,
@@ -55,7 +111,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.3711045558272e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.035555028814387934,
   "eval_steps": 500,
+  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004941336809670538,
       "loss": 1.0972,
       "step": 200
+    },
+    {
+      "epoch": 0.014814595339328307,
+      "grad_norm": 0.07067917287349701,
+      "learning_rate": 0.0004926522872718654,
+      "loss": 1.1012,
+      "step": 250
+    },
+    {
+      "epoch": 0.017777514407193967,
+      "grad_norm": 0.12936587631702423,
+      "learning_rate": 0.0004911708935766769,
+      "loss": 1.0908,
+      "step": 300
+    },
+    {
+      "epoch": 0.02074043347505963,
+      "grad_norm": 0.09118826687335968,
+      "learning_rate": 0.0004896894998814885,
+      "loss": 1.0773,
+      "step": 350
+    },
+    {
+      "epoch": 0.02370335254292529,
+      "grad_norm": 0.08850109577178955,
+      "learning_rate": 0.00048820810618630005,
+      "loss": 1.1012,
+      "step": 400
+    },
+    {
+      "epoch": 0.02666627161079095,
+      "grad_norm": 0.07888604700565338,
+      "learning_rate": 0.00048672671249111167,
+      "loss": 1.1343,
+      "step": 450
+    },
+    {
+      "epoch": 0.029629190678656613,
+      "grad_norm": 0.0906878113746643,
+      "learning_rate": 0.00048524531879592323,
+      "loss": 1.1184,
+      "step": 500
+    },
+    {
+      "epoch": 0.032592109746522276,
+      "grad_norm": 0.07720430195331573,
+      "learning_rate": 0.0004837639251007348,
+      "loss": 1.0968,
+      "step": 550
+    },
+    {
+      "epoch": 0.035555028814387934,
+      "grad_norm": 0.0831717997789383,
+      "learning_rate": 0.00048228253140554636,
+      "loss": 1.0983,
+      "step": 600
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 4.1133136674816e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null