Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

README.md +0 -13
optimizer.pt +2 -2
rng_state.pth +1 -1
scheduler.pt +1 -1
trainer_state.json +177 -27

README.md CHANGED Viewed

@@ -4,18 +4,6 @@ library_name: peft
 ## Training procedure
-The following `bitsandbytes` quantization config was used during training:
-- quant_method: bitsandbytes
-- load_in_8bit: False
-- load_in_4bit: True
-- llm_int8_threshold: 6.0
-- llm_int8_skip_modules: None
-- llm_int8_enable_fp32_cpu_offload: False
-- llm_int8_has_fp16_weight: False
-- bnb_4bit_quant_type: fp4
-- bnb_4bit_use_double_quant: False
-- bnb_4bit_compute_dtype: float16
 The following `bitsandbytes` quantization config was used during training:
 - quant_method: bitsandbytes
 - load_in_8bit: False
@@ -29,6 +17,5 @@ The following `bitsandbytes` quantization config was used during training:
 - bnb_4bit_compute_dtype: float16
 ### Framework versions
-- PEFT 0.4.0
 - PEFT 0.4.0

 ## Training procedure
 The following `bitsandbytes` quantization config was used during training:
 - quant_method: bitsandbytes
 - load_in_8bit: False
 - bnb_4bit_compute_dtype: float16
 ### Framework versions
 - PEFT 0.4.0

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:441ed8437ef696fca88f87327b562c9340116b1441f6c8d2c8d50fc293f0f392
-size 63564410

 version https://git-lfs.github.com/spec/v1
+oid sha256:868f5d86fc275789c060248405ac631b57ec129ab94e0620f2afcb7c25733ddd
+size 16308576

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:875acd07fbaa252f994f5aa2f25ef1f4bdd0643009b8c06c5b0ae9b034919328
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:64d2339224b4468b2fd7540559183bd139477eaccc037e756e8392f83c137ce5
 size 14244

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e2b9a4abb47b1f7cc1dfd2e4035b39611a01db8ddaa04fd2cde539012db4dec1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:65e28920f1b4bd7ff8778a8ef5cd8795cbe7056eb9339aefdd67bef14c405f19
 size 1064

trainer_state.json CHANGED Viewed

@@ -3,65 +3,215 @@
   "best_model_checkpoint": null,
   "epoch": 1.0,
   "eval_steps": 500,
-  "global_step": 4420,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.11,
-      "learning_rate": 0.00019375130200295878,
-      "loss": 0.4073,
       "step": 500
     },
     {
-      "epoch": 0.23,
-      "learning_rate": 0.0001757861325449997,
-      "loss": 0.4069,
       "step": 1000
     },
     {
-      "epoch": 0.34,
-      "learning_rate": 0.00014834966999429178,
-      "loss": 0.4009,
       "step": 1500
     },
     {
-      "epoch": 0.45,
-      "learning_rate": 0.00011487075772256517,
-      "loss": 0.3973,
       "step": 2000
     },
     {
-      "epoch": 0.57,
-      "learning_rate": 7.953338797092902e-05,
-      "loss": 0.3955,
       "step": 2500
     },
     {
-      "epoch": 0.68,
-      "learning_rate": 4.6753811771138364e-05,
-      "loss": 0.3905,
       "step": 3000
     },
     {
-      "epoch": 0.79,
-      "learning_rate": 2.062862256606306e-05,
-      "loss": 0.3881,
       "step": 3500
     },
     {
-      "epoch": 0.9,
-      "learning_rate": 4.422788704864633e-06,
-      "loss": 0.3841,
       "step": 4000
     }
   ],
   "logging_steps": 500,
-  "max_steps": 4420,
   "num_train_epochs": 1,
   "save_steps": 500,
-  "total_flos": 3.644311294921707e+17,
   "trial_name": null,
   "trial_params": null
 }

   "best_model_checkpoint": null,
   "epoch": 1.0,
   "eval_steps": 500,
+  "global_step": 16844,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.03,
+      "learning_rate": 0.00019956548524376711,
+      "loss": 0.3719,
       "step": 500
     },
     {
+      "epoch": 0.06,
+      "learning_rate": 0.0001982657170365362,
+      "loss": 0.3765,
       "step": 1000
     },
     {
+      "epoch": 0.09,
+      "learning_rate": 0.00019611199074762167,
+      "loss": 0.3756,
       "step": 1500
     },
     {
+      "epoch": 0.12,
+      "learning_rate": 0.000193123022894092,
+      "loss": 0.3751,
       "step": 2000
     },
     {
+      "epoch": 0.15,
+      "learning_rate": 0.00018932478848871238,
+      "loss": 0.376,
       "step": 2500
     },
     {
+      "epoch": 0.18,
+      "learning_rate": 0.00018475029530941827,
+      "loss": 0.3765,
       "step": 3000
     },
     {
+      "epoch": 0.21,
+      "learning_rate": 0.00017943929705198342,
+      "loss": 0.3737,
       "step": 3500
     },
     {
+      "epoch": 0.24,
+      "learning_rate": 0.00017343794785867154,
+      "loss": 0.37,
       "step": 4000
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 0.00016679840122511857,
+      "loss": 0.3791,
+      "step": 4500
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 0.00015957835677106406,
+      "loss": 0.3706,
+      "step": 5000
+    },
+    {
+      "epoch": 0.33,
+      "learning_rate": 0.00015184055881362684,
+      "loss": 0.3785,
+      "step": 5500
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 0.00014365225110067207,
+      "loss": 0.3701,
+      "step": 6000
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 0.00013508459244279678,
+      "loss": 0.3733,
+      "step": 6500
+    },
+    {
+      "epoch": 0.42,
+      "learning_rate": 0.00012621203832226526,
+      "loss": 0.3713,
+      "step": 7000
+    },
+    {
+      "epoch": 0.45,
+      "learning_rate": 0.00011711169385289445,
+      "loss": 0.3731,
+      "step": 7500
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 0.00010786264371385917,
+      "loss": 0.377,
+      "step": 8000
+    },
+    {
+      "epoch": 0.5,
+      "learning_rate": 9.854526488049042e-05,
+      "loss": 0.3731,
+      "step": 8500
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 8.924052812463844e-05,
+      "loss": 0.3762,
+      "step": 9000
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 8.002929435476878e-05,
+      "loss": 0.3777,
+      "step": 9500
+    },
+    {
+      "epoch": 0.59,
+      "learning_rate": 7.099161191080386e-05,
+      "loss": 0.3699,
+      "step": 10000
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 6.220602092042465e-05,
+      "loss": 0.3778,
+      "step": 10500
+    },
+    {
+      "epoch": 0.65,
+      "learning_rate": 5.3748870762182066e-05,
+      "loss": 0.371,
+      "step": 11000
+    },
+    {
+      "epoch": 0.68,
+      "learning_rate": 4.5693656566864785e-05,
+      "loss": 0.3747,
+      "step": 11500
+    },
+    {
+      "epoch": 0.71,
+      "learning_rate": 3.81103805231225e-05,
+      "loss": 0.3703,
+      "step": 12000
+    },
+    {
+      "epoch": 0.74,
+      "learning_rate": 3.1064943537786984e-05,
+      "loss": 0.3738,
+      "step": 12500
+    },
+    {
+      "epoch": 0.77,
+      "learning_rate": 2.4618572537543038e-05,
+      "loss": 0.3739,
+      "step": 13000
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 1.882728838886583e-05,
+      "loss": 0.372,
+      "step": 13500
+    },
+    {
+      "epoch": 0.83,
+      "learning_rate": 1.3741419060158056e-05,
+      "loss": 0.3716,
+      "step": 14000
+    },
+    {
+      "epoch": 0.86,
+      "learning_rate": 9.405162256851662e-06,
+      "loss": 0.3698,
+      "step": 14500
+    },
+    {
+      "epoch": 0.89,
+      "learning_rate": 5.8562013303037124e-06,
+      "loss": 0.3743,
+      "step": 15000
+    },
+    {
+      "epoch": 0.92,
+      "learning_rate": 3.1253777983517363e-06,
+      "loss": 0.376,
+      "step": 15500
+    },
+    {
+      "epoch": 0.95,
+      "learning_rate": 1.236423323421776e-06,
+      "loss": 0.3746,
+      "step": 16000
+    },
+    {
+      "epoch": 0.98,
+      "learning_rate": 2.0575347737803452e-07,
+      "loss": 0.3738,
+      "step": 16500
     }
   ],
   "logging_steps": 500,
+  "max_steps": 16844,
   "num_train_epochs": 1,
   "save_steps": 500,
+  "total_flos": 3.457848488949235e+17,
   "trial_name": null,
   "trial_params": null
 }