Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

adapter_model.safetensors +1 -1
optimizer.pt +1 -1
rng_state.pth +1 -1
scheduler.pt +1 -1
trainer_state.json +284 -4

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f38f6c7cc699891d33b43936e13424bf4f5c79f2e529aeddd1d38cb2b4e18803
 size 359270696

 version https://git-lfs.github.com/spec/v1
+oid sha256:26c9ef3202a5fecea3cf4e8797de14edaf72f05c99e7339f204d4ae79fc4059e
 size 359270696

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d10e91706fa7e2ba3d059b9c546fae7154a67c360412992451723c1d1fe5cc02
 size 718831691

 version https://git-lfs.github.com/spec/v1
+oid sha256:d57a71f32bc02f77680270598e164ef53cf521b9842a915012f167967134ca03
 size 718831691

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:198cd7bfd4cc7ff1f8428350e0d8215b1fdfe0f5fb7d8e52ae1940a8f9a2b85b
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:a3d312928d0bb60518eb9856d5ab0ae1674bcb745294bf27f615cb6d07b0463e
 size 14645

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f9d102cc71e10314031dfa77900e5dce67d2852876159da43d82b7787c5824f
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:5418e0fc9a3a6a50ea3a7b440dfb8b2fa26686b28c8f28256150a09922035962
 size 1465

trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 3.0,
   "eval_steps": 500,
-  "global_step": 60,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -428,6 +428,286 @@
       "learning_rate": 0.00023614713127100752,
       "loss": 0.5813,
       "step": 60
     }
   ],
   "logging_steps": 1,
@@ -442,12 +722,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.746175843467264e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 5.0,
   "eval_steps": 500,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00023614713127100752,
       "loss": 0.5813,
       "step": 60
+    },
+    {
+      "epoch": 3.050632911392405,
+      "grad_norm": 2.65234112739563,
+      "learning_rate": 0.00022693291013417452,
+      "loss": 0.7218,
+      "step": 61
+    },
+    {
+      "epoch": 3.1012658227848102,
+      "grad_norm": 1.9882073402404785,
+      "learning_rate": 0.00021775019586744925,
+      "loss": 0.5474,
+      "step": 62
+    },
+    {
+      "epoch": 3.151898734177215,
+      "grad_norm": 2.5399203300476074,
+      "learning_rate": 0.0002086115309539675,
+      "loss": 0.7906,
+      "step": 63
+    },
+    {
+      "epoch": 3.2025316455696204,
+      "grad_norm": 1.6655718088150024,
+      "learning_rate": 0.0001995293977107475,
+      "loss": 0.3726,
+      "step": 64
+    },
+    {
+      "epoch": 3.2531645569620253,
+      "grad_norm": 2.330378770828247,
+      "learning_rate": 0.00019051620123934537,
+      "loss": 0.6493,
+      "step": 65
+    },
+    {
+      "epoch": 3.3037974683544302,
+      "grad_norm": 2.360882043838501,
+      "learning_rate": 0.0001815842524819793,
+      "loss": 0.6553,
+      "step": 66
+    },
+    {
+      "epoch": 3.3544303797468356,
+      "grad_norm": 2.726463556289673,
+      "learning_rate": 0.00017274575140626317,
+      "loss": 0.7101,
+      "step": 67
+    },
+    {
+      "epoch": 3.4050632911392404,
+      "grad_norm": 2.313028573989868,
+      "learning_rate": 0.00016401277034151795,
+      "loss": 0.773,
+      "step": 68
+    },
+    {
+      "epoch": 3.4556962025316453,
+      "grad_norm": 2.5557079315185547,
+      "learning_rate": 0.00015539723748942243,
+      "loss": 0.7098,
+      "step": 69
+    },
+    {
+      "epoch": 3.5063291139240507,
+      "grad_norm": 2.11527681350708,
+      "learning_rate": 0.00014691092063152418,
+      "loss": 0.5224,
+      "step": 70
+    },
+    {
+      "epoch": 3.5569620253164556,
+      "grad_norm": 2.06489634513855,
+      "learning_rate": 0.00013856541105586545,
+      "loss": 0.565,
+      "step": 71
+    },
+    {
+      "epoch": 3.607594936708861,
+      "grad_norm": 2.3769450187683105,
+      "learning_rate": 0.0001303721077246784,
+      "loss": 0.5167,
+      "step": 72
+    },
+    {
+      "epoch": 3.6582278481012658,
+      "grad_norm": 3.1446056365966797,
+      "learning_rate": 0.0001223422017047733,
+      "loss": 0.738,
+      "step": 73
+    },
+    {
+      "epoch": 3.708860759493671,
+      "grad_norm": 2.9032044410705566,
+      "learning_rate": 0.00011448666088188764,
+      "loss": 0.84,
+      "step": 74
+    },
+    {
+      "epoch": 3.759493670886076,
+      "grad_norm": 2.6407244205474854,
+      "learning_rate": 0.00010681621497987371,
+      "loss": 0.55,
+      "step": 75
+    },
+    {
+      "epoch": 3.810126582278481,
+      "grad_norm": 2.3889331817626953,
+      "learning_rate": 9.934134090518593e-05,
+      "loss": 0.6036,
+      "step": 76
+    },
+    {
+      "epoch": 3.8607594936708862,
+      "grad_norm": 2.591595411300659,
+      "learning_rate": 9.207224843668733e-05,
+      "loss": 0.4689,
+      "step": 77
+    },
+    {
+      "epoch": 3.911392405063291,
+      "grad_norm": 2.4760868549346924,
+      "learning_rate": 8.50188662803194e-05,
+      "loss": 0.4915,
+      "step": 78
+    },
+    {
+      "epoch": 3.962025316455696,
+      "grad_norm": 2.930954694747925,
+      "learning_rate": 7.819082850768433e-05,
+      "loss": 0.8307,
+      "step": 79
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 2.1440188884735107,
+      "learning_rate": 7.159746139706194e-05,
+      "loss": 0.4358,
+      "step": 80
+    },
+    {
+      "epoch": 4.050632911392405,
+      "grad_norm": 1.4254531860351562,
+      "learning_rate": 6.524777069483526e-05,
+      "loss": 0.2303,
+      "step": 81
+    },
+    {
+      "epoch": 4.10126582278481,
+      "grad_norm": 2.0634138584136963,
+      "learning_rate": 5.9150429314724254e-05,
+      "loss": 0.5029,
+      "step": 82
+    },
+    {
+      "epoch": 4.151898734177215,
+      "grad_norm": 1.7933154106140137,
+      "learning_rate": 5.3313765491629194e-05,
+      "loss": 0.3326,
+      "step": 83
+    },
+    {
+      "epoch": 4.2025316455696204,
+      "grad_norm": 1.1411280632019043,
+      "learning_rate": 4.7745751406263163e-05,
+      "loss": 0.1651,
+      "step": 84
+    },
+    {
+      "epoch": 4.253164556962025,
+      "grad_norm": 1.6106775999069214,
+      "learning_rate": 4.245399229611238e-05,
+      "loss": 0.3276,
+      "step": 85
+    },
+    {
+      "epoch": 4.30379746835443,
+      "grad_norm": 1.6101868152618408,
+      "learning_rate": 3.7445716067596506e-05,
+      "loss": 0.3073,
+      "step": 86
+    },
+    {
+      "epoch": 4.3544303797468356,
+      "grad_norm": 1.1931700706481934,
+      "learning_rate": 3.2727763423617915e-05,
+      "loss": 0.1378,
+      "step": 87
+    },
+    {
+      "epoch": 4.405063291139241,
+      "grad_norm": 1.7510262727737427,
+      "learning_rate": 2.8306578519984528e-05,
+      "loss": 0.2117,
+      "step": 88
+    },
+    {
+      "epoch": 4.455696202531645,
+      "grad_norm": 1.7942403554916382,
+      "learning_rate": 2.4188200163467787e-05,
+      "loss": 0.1961,
+      "step": 89
+    },
+    {
+      "epoch": 4.506329113924051,
+      "grad_norm": 1.7418352365493774,
+      "learning_rate": 2.0378253563519245e-05,
+      "loss": 0.2915,
+      "step": 90
+    },
+    {
+      "epoch": 4.556962025316456,
+      "grad_norm": 1.7192072868347168,
+      "learning_rate": 1.6881942648911074e-05,
+      "loss": 0.1873,
+      "step": 91
+    },
+    {
+      "epoch": 4.6075949367088604,
+      "grad_norm": 1.5197887420654297,
+      "learning_rate": 1.3704042959795133e-05,
+      "loss": 0.3229,
+      "step": 92
+    },
+    {
+      "epoch": 4.658227848101266,
+      "grad_norm": 1.1745399236679077,
+      "learning_rate": 1.0848895124889818e-05,
+      "loss": 0.1945,
+      "step": 93
+    },
+    {
+      "epoch": 4.708860759493671,
+      "grad_norm": 1.7969136238098145,
+      "learning_rate": 8.320398932703144e-06,
+      "loss": 0.2555,
+      "step": 94
+    },
+    {
+      "epoch": 4.759493670886076,
+      "grad_norm": 1.9828176498413086,
+      "learning_rate": 6.12200800489085e-06,
+      "loss": 0.2931,
+      "step": 95
+    },
+    {
+      "epoch": 4.810126582278481,
+      "grad_norm": 1.6430472135543823,
+      "learning_rate": 4.256725079024554e-06,
+      "loss": 0.1705,
+      "step": 96
+    },
+    {
+      "epoch": 4.860759493670886,
+      "grad_norm": 1.5796780586242676,
+      "learning_rate": 2.7270979072135106e-06,
+      "loss": 0.2818,
+      "step": 97
+    },
+    {
+      "epoch": 4.911392405063291,
+      "grad_norm": 1.7523950338363647,
+      "learning_rate": 1.5352157761815977e-06,
+      "loss": 0.2395,
+      "step": 98
+    },
+    {
+      "epoch": 4.962025316455696,
+      "grad_norm": 1.9358466863632202,
+      "learning_rate": 6.827066535529947e-07,
+      "loss": 0.3112,
+      "step": 99
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 2.1313531398773193,
+      "learning_rate": 1.7073496424427348e-07,
+      "loss": 0.102,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.9265650012307456e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null