Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

model-00001-of-00002.safetensors +1 -1
model-00002-of-00002.safetensors +1 -1
optimizer.pt +2 -2
rng_state.pth +1 -1
scheduler.pt +1 -1
trainer_state.json +127 -162
training_args.bin +1 -1

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:22e61a7a8de9f77bde779dfb846c91806787ab268e54083d22ef514071bb5f82
 size 4991031824

 version https://git-lfs.github.com/spec/v1
+oid sha256:4fdc69c9532d50d8cbcfc7357cd186c5f961216500c340a60e088c6248848814
 size 4991031824

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eb2a023ee138db84cb938d952170f44e13d54562e07e0156b3aeab57d754461a
 size 1610725592

 version https://git-lfs.github.com/spec/v1
+oid sha256:5acc5280e967a03411a879cc37d8f4c9b24d1d8a7df4b74a0ab654ca2b2308b4
 size 1610725592

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7f94ceb272f725414956e963487ef397fff2499b3207027f08016ee0e30d1ae
-size 13203681623

 version https://git-lfs.github.com/spec/v1
+oid sha256:2acac140f381a83b89f9d555cc696d4840505d66eeeaf95bf3c42656fe32c150
+size 13203678103

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:61c19bab1174704a4a4441475683bf1270277af15d2e2c95e964789128e482c4
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:a8e2011629d8bed3ef560fa11175cac55684c4e12a72634bb24abf767b6c7399
 size 14645

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15db80273eb922131fee165eee31e9743cd2224399faebd1ef9e6addce818d49
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:993186ddfb6142501973771452980601dfb89818da88b2abc705869965101f78
 size 1465

trainer_state.json CHANGED Viewed

@@ -2,264 +2,229 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.0,
   "eval_steps": 500,
-  "global_step": 1784,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.028026905829596414,
-      "grad_norm": 9.9375,
-      "learning_rate": 4.862668161434978e-05,
-      "loss": 5.2794,
       "step": 50
     },
     {
-      "epoch": 0.05605381165919283,
-      "grad_norm": 16.125,
-      "learning_rate": 4.7225336322869954e-05,
-      "loss": 5.2112,
       "step": 100
     },
     {
-      "epoch": 0.08408071748878924,
-      "grad_norm": 7.5,
-      "learning_rate": 4.5823991031390135e-05,
-      "loss": 5.1903,
       "step": 150
     },
     {
-      "epoch": 0.11210762331838565,
-      "grad_norm": 8.25,
-      "learning_rate": 4.442264573991032e-05,
-      "loss": 5.1039,
       "step": 200
     },
     {
-      "epoch": 0.14013452914798205,
-      "grad_norm": 7.6875,
-      "learning_rate": 4.30213004484305e-05,
-      "loss": 5.1759,
       "step": 250
     },
     {
-      "epoch": 0.1681614349775785,
-      "grad_norm": 6.875,
-      "learning_rate": 4.161995515695067e-05,
-      "loss": 5.1191,
       "step": 300
     },
     {
-      "epoch": 0.1961883408071749,
-      "grad_norm": 9.0625,
-      "learning_rate": 4.0218609865470855e-05,
-      "loss": 5.1635,
       "step": 350
     },
     {
-      "epoch": 0.2242152466367713,
-      "grad_norm": 7.34375,
-      "learning_rate": 3.8817264573991036e-05,
-      "loss": 5.0679,
       "step": 400
     },
     {
-      "epoch": 0.2522421524663677,
-      "grad_norm": 7.65625,
-      "learning_rate": 3.741591928251121e-05,
-      "loss": 4.9766,
       "step": 450
     },
     {
-      "epoch": 0.2802690582959641,
-      "grad_norm": 11.0,
-      "learning_rate": 3.601457399103139e-05,
-      "loss": 5.0054,
       "step": 500
     },
     {
-      "epoch": 0.30829596412556054,
-      "grad_norm": 9.0,
-      "learning_rate": 3.461322869955157e-05,
-      "loss": 5.0283,
       "step": 550
     },
     {
-      "epoch": 0.336322869955157,
-      "grad_norm": 9.875,
-      "learning_rate": 3.321188340807175e-05,
-      "loss": 5.013,
       "step": 600
     },
     {
-      "epoch": 0.36434977578475336,
-      "grad_norm": 7.75,
-      "learning_rate": 3.181053811659193e-05,
-      "loss": 4.9833,
       "step": 650
     },
     {
-      "epoch": 0.3923766816143498,
-      "grad_norm": 10.0,
-      "learning_rate": 3.040919282511211e-05,
-      "loss": 5.0368,
       "step": 700
     },
     {
-      "epoch": 0.4204035874439462,
-      "grad_norm": 9.4375,
-      "learning_rate": 2.9007847533632287e-05,
-      "loss": 4.9734,
       "step": 750
     },
     {
-      "epoch": 0.4484304932735426,
-      "grad_norm": 11.25,
-      "learning_rate": 2.7606502242152465e-05,
-      "loss": 4.9578,
       "step": 800
     },
     {
-      "epoch": 0.476457399103139,
-      "grad_norm": 8.75,
-      "learning_rate": 2.620515695067265e-05,
-      "loss": 4.9768,
       "step": 850
     },
     {
-      "epoch": 0.5044843049327354,
-      "grad_norm": 10.25,
-      "learning_rate": 2.480381165919283e-05,
-      "loss": 4.897,
       "step": 900
     },
     {
-      "epoch": 0.5325112107623319,
-      "grad_norm": 9.0625,
-      "learning_rate": 2.3402466367713007e-05,
-      "loss": 4.8971,
       "step": 950
     },
     {
-      "epoch": 0.5605381165919282,
-      "grad_norm": 6.875,
-      "learning_rate": 2.2001121076233185e-05,
-      "loss": 4.8569,
       "step": 1000
     },
     {
-      "epoch": 0.5885650224215246,
-      "grad_norm": 8.9375,
-      "learning_rate": 2.0599775784753363e-05,
-      "loss": 4.8683,
       "step": 1050
     },
     {
-      "epoch": 0.6165919282511211,
-      "grad_norm": 8.125,
-      "learning_rate": 1.9198430493273544e-05,
-      "loss": 4.9153,
       "step": 1100
     },
     {
-      "epoch": 0.6446188340807175,
-      "grad_norm": 7.4375,
-      "learning_rate": 1.7797085201793723e-05,
-      "loss": 4.9176,
       "step": 1150
     },
     {
-      "epoch": 0.672645739910314,
-      "grad_norm": 7.375,
-      "learning_rate": 1.63957399103139e-05,
-      "loss": 4.884,
       "step": 1200
     },
     {
-      "epoch": 0.7006726457399103,
-      "grad_norm": 7.6875,
-      "learning_rate": 1.4994394618834082e-05,
-      "loss": 4.9137,
       "step": 1250
     },
     {
-      "epoch": 0.7286995515695067,
-      "grad_norm": 7.34375,
-      "learning_rate": 1.359304932735426e-05,
-      "loss": 4.8802,
       "step": 1300
     },
     {
-      "epoch": 0.7567264573991032,
-      "grad_norm": 9.25,
-      "learning_rate": 1.219170403587444e-05,
-      "loss": 4.9338,
       "step": 1350
     },
     {
-      "epoch": 0.7847533632286996,
-      "grad_norm": 10.375,
-      "learning_rate": 1.079035874439462e-05,
-      "loss": 4.9095,
       "step": 1400
     },
     {
-      "epoch": 0.8127802690582959,
-      "grad_norm": 7.15625,
-      "learning_rate": 9.389013452914798e-06,
-      "loss": 4.9246,
       "step": 1450
     },
     {
-      "epoch": 0.8408071748878924,
-      "grad_norm": 6.34375,
-      "learning_rate": 7.987668161434977e-06,
-      "loss": 4.8827,
       "step": 1500
-    },
-    {
-      "epoch": 0.8688340807174888,
-      "grad_norm": 7.0,
-      "learning_rate": 6.5863228699551565e-06,
-      "loss": 4.9036,
-      "step": 1550
-    },
-    {
-      "epoch": 0.8968609865470852,
-      "grad_norm": 10.0,
-      "learning_rate": 5.184977578475336e-06,
-      "loss": 4.8653,
-      "step": 1600
-    },
-    {
-      "epoch": 0.9248878923766816,
-      "grad_norm": 7.46875,
-      "learning_rate": 3.783632286995516e-06,
-      "loss": 4.8676,
-      "step": 1650
-    },
-    {
-      "epoch": 0.952914798206278,
-      "grad_norm": 8.25,
-      "learning_rate": 2.3822869955156952e-06,
-      "loss": 4.8952,
-      "step": 1700
-    },
-    {
-      "epoch": 0.9809417040358744,
-      "grad_norm": 6.3125,
-      "learning_rate": 9.809417040358745e-07,
-      "loss": 4.9213,
-      "step": 1750
     }
   ],
   "logging_steps": 50,
-  "max_steps": 1784,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 1,
-  "save_steps": 5000,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {
@@ -267,12 +232,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": true
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.5271922632402944e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 3.3632286995515694,
   "eval_steps": 500,
+  "global_step": 1500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.11210762331838565,
+      "grad_norm": 4.625,
+      "learning_rate": 9.14179104477612e-06,
+      "loss": 5.492,
       "step": 50
     },
     {
+      "epoch": 0.2242152466367713,
+      "grad_norm": 4.90625,
+      "learning_rate": 1.8470149253731344e-05,
+      "loss": 5.439,
       "step": 100
     },
     {
+      "epoch": 0.336322869955157,
+      "grad_norm": 3.84375,
+      "learning_rate": 2.7798507462686568e-05,
+      "loss": 5.2502,
       "step": 150
     },
     {
+      "epoch": 0.4484304932735426,
+      "grad_norm": 6.59375,
+      "learning_rate": 3.7126865671641795e-05,
+      "loss": 5.161,
       "step": 200
     },
     {
+      "epoch": 0.5605381165919282,
+      "grad_norm": 6.25,
+      "learning_rate": 4.645522388059701e-05,
+      "loss": 5.0548,
       "step": 250
     },
     {
+      "epoch": 0.672645739910314,
+      "grad_norm": 6.71875,
+      "learning_rate": 4.999325361589072e-05,
+      "loss": 5.0148,
       "step": 300
     },
     {
+      "epoch": 0.7847533632286996,
+      "grad_norm": 5.6875,
+      "learning_rate": 4.9953952730494324e-05,
+      "loss": 5.0028,
       "step": 350
     },
     {
+      "epoch": 0.8968609865470852,
+      "grad_norm": 5.09375,
+      "learning_rate": 4.987961816680492e-05,
+      "loss": 4.9576,
       "step": 400
     },
     {
+      "epoch": 1.0089686098654709,
+      "grad_norm": 4.375,
+      "learning_rate": 4.977035428557125e-05,
+      "loss": 4.929,
       "step": 450
     },
     {
+      "epoch": 1.1210762331838564,
+      "grad_norm": 4.78125,
+      "learning_rate": 4.9626314485964385e-05,
+      "loss": 4.7677,
       "step": 500
     },
     {
+      "epoch": 1.2331838565022422,
+      "grad_norm": 4.875,
+      "learning_rate": 4.944770099021562e-05,
+      "loss": 4.7483,
       "step": 550
     },
     {
+      "epoch": 1.3452914798206277,
+      "grad_norm": 5.9375,
+      "learning_rate": 4.923476455971e-05,
+      "loss": 4.7362,
       "step": 600
     },
     {
+      "epoch": 1.4573991031390134,
+      "grad_norm": 7.0625,
+      "learning_rate": 4.898780414293411e-05,
+      "loss": 4.7181,
       "step": 650
     },
     {
+      "epoch": 1.5695067264573992,
+      "grad_norm": 4.1875,
+      "learning_rate": 4.870716645577244e-05,
+      "loss": 4.719,
       "step": 700
     },
     {
+      "epoch": 1.6816143497757847,
+      "grad_norm": 5.15625,
+      "learning_rate": 4.839324549474148e-05,
+      "loss": 4.727,
       "step": 750
     },
     {
+      "epoch": 1.7937219730941703,
+      "grad_norm": 4.46875,
+      "learning_rate": 4.804648198384507e-05,
+      "loss": 4.7355,
       "step": 800
     },
     {
+      "epoch": 1.905829596412556,
+      "grad_norm": 4.59375,
+      "learning_rate": 4.7667362755827306e-05,
+      "loss": 4.7111,
       "step": 850
     },
     {
+      "epoch": 2.0179372197309418,
+      "grad_norm": 4.15625,
+      "learning_rate": 4.725642006869207e-05,
+      "loss": 4.6229,
       "step": 900
     },
     {
+      "epoch": 2.1300448430493275,
+      "grad_norm": 5.53125,
+      "learning_rate": 4.68142308584484e-05,
+      "loss": 4.3583,
       "step": 950
     },
     {
+      "epoch": 2.242152466367713,
+      "grad_norm": 3.875,
+      "learning_rate": 4.634141592913097e-05,
+      "loss": 4.3665,
       "step": 1000
     },
     {
+      "epoch": 2.3542600896860986,
+      "grad_norm": 3.890625,
+      "learning_rate": 4.583863908123282e-05,
+      "loss": 4.4125,
       "step": 1050
     },
     {
+      "epoch": 2.4663677130044843,
+      "grad_norm": 4.21875,
+      "learning_rate": 4.530660617977393e-05,
+      "loss": 4.3592,
       "step": 1100
     },
     {
+      "epoch": 2.57847533632287,
+      "grad_norm": 4.5,
+      "learning_rate": 4.474606416331397e-05,
+      "loss": 4.371,
       "step": 1150
     },
     {
+      "epoch": 2.6905829596412554,
+      "grad_norm": 4.3125,
+      "learning_rate": 4.415779999530064e-05,
+      "loss": 4.4046,
       "step": 1200
     },
     {
+      "epoch": 2.802690582959641,
+      "grad_norm": 4.21875,
+      "learning_rate": 4.354263955922568e-05,
+      "loss": 4.3805,
       "step": 1250
     },
     {
+      "epoch": 2.914798206278027,
+      "grad_norm": 4.15625,
+      "learning_rate": 4.290144649913973e-05,
+      "loss": 4.3829,
       "step": 1300
     },
     {
+      "epoch": 3.0269058295964126,
+      "grad_norm": 4.6875,
+      "learning_rate": 4.2235121007153975e-05,
+      "loss": 4.2465,
       "step": 1350
     },
     {
+      "epoch": 3.1390134529147984,
+      "grad_norm": 6.0,
+      "learning_rate": 4.1544598559630694e-05,
+      "loss": 3.8933,
       "step": 1400
     },
     {
+      "epoch": 3.2511210762331837,
+      "grad_norm": 4.875,
+      "learning_rate": 4.083084860383708e-05,
+      "loss": 3.9146,
       "step": 1450
     },
     {
+      "epoch": 3.3632286995515694,
+      "grad_norm": 4.6875,
+      "learning_rate": 4.009487319690626e-05,
+      "loss": 3.9086,
       "step": 1500
     }
   ],
   "logging_steps": 50,
+  "max_steps": 4460,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": false
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.135152845033677e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:14b8e7fa23f8365e97ec329503b35572e8528d5f7cac5d82eacf16bcc34bdde3
 size 5777

 version https://git-lfs.github.com/spec/v1
+oid sha256:b07c97d6cade78ad89a0fb0e3020fbd76a7fe36e45ae68b0391dd5d87dad3625
 size 5777