train_residue_list_lr_3e-4_5_epochs_neg_5

Browse files

Files changed (11) hide show

README.md +36 -16
adapter_config.json +4 -4
adapter_model.safetensors +1 -1
all_results.json +11 -11
eval_results.json +6 -6
model_eval_results.csv +0 -0
trainer_log.jsonl +0 -0
trainer_state.json +0 -0
training_args.bin +2 -2
training_eval_loss.png +2 -2
training_loss.png +2 -2

README.md CHANGED Viewed

@@ -7,19 +7,19 @@ tags:
 - lora
 - generated_from_trainer
 model-index:
-- name: train_residue_list_lr_3e-4_5_epochs
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# train_residue_list_lr_3e-4_5_epochs
 This model is a fine-tuned version of [GreatCaptainNemo/ProLLaMA_Stage_1](https://huggingface.co/GreatCaptainNemo/ProLLaMA_Stage_1) on the adpr_train dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.2519
-- Num Input Tokens Seen: 13420336
 ## Model description
@@ -53,22 +53,42 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss | Input Tokens Seen |
 |:-------------:|:------:|:----:|:---------------:|:-----------------:|
-| 0.4552        | 0.4561 | 100  | 0.4630          | 1229824           |
-| 0.3728        | 0.9122 | 200  | 0.3762          | 2457344           |
-| 0.3172        | 1.3649 | 300  | 0.3195          | 3679728           |
-| 0.2906        | 1.8210 | 400  | 0.2807          | 4908144           |
-| 0.221         | 2.2737 | 500  | 0.2369          | 6131072           |
-| 0.2005        | 2.7298 | 600  | 0.2124          | 7358336           |
-| 0.1387        | 3.1824 | 700  | 0.2251          | 8576496           |
-| 0.1317        | 3.6385 | 800  | 0.2068          | 9805424           |
-| 0.0605        | 4.0912 | 900  | 0.2535          | 11023792          |
-| 0.0647        | 4.5473 | 1000 | 0.2509          | 12251056          |
 ### Framework versions
 - PEFT 0.14.0
-- Transformers 4.50.3
 - Pytorch 2.3.1+cu121
 - Datasets 3.5.0
-- Tokenizers 0.21.0

 - lora
 - generated_from_trainer
 model-index:
+- name: train_residue_list_lr_3e-4_5_epochs_neg_5
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# train_residue_list_lr_3e-4_5_epochs_neg_5
 This model is a fine-tuned version of [GreatCaptainNemo/ProLLaMA_Stage_1](https://huggingface.co/GreatCaptainNemo/ProLLaMA_Stage_1) on the adpr_train dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.1675
+- Num Input Tokens Seen: 36312064
 ## Model description
 | Training Loss | Epoch  | Step | Validation Loss | Input Tokens Seen |
 |:-------------:|:------:|:----:|:---------------:|:-----------------:|
+| 0.314         | 0.1617 | 100  | 0.3099          | 1177472           |
+| 1.0912        | 0.3235 | 200  | 0.2833          | 2353792           |
+| 0.2867        | 0.4852 | 300  | 0.2665          | 3529856           |
+| 0.2527        | 0.6470 | 400  | 0.2551          | 4704512           |
+| 0.251         | 0.8087 | 500  | 0.2453          | 5881088           |
+| 0.2157        | 0.9705 | 600  | 0.2286          | 7056896           |
+| 0.2341        | 1.1310 | 700  | 0.2221          | 8223104           |
+| 0.2017        | 1.2928 | 800  | 0.2096          | 9403904           |
+| 0.1898        | 1.4545 | 900  | 0.2038          | 10580480          |
+| 0.2013        | 1.6163 | 1000 | 0.2014          | 11757056          |
+| 0.2059        | 1.7780 | 1100 | 0.1935          | 12932352          |
+| 0.1938        | 1.9397 | 1200 | 0.1979          | 14108928          |
+| 0.1803        | 2.1003 | 1300 | 0.1866          | 15276800          |
+| 0.1628        | 2.2620 | 1400 | 0.1774          | 16452224          |
+| 0.178         | 2.4238 | 1500 | 0.1787          | 17630080          |
+| 0.1751        | 2.5855 | 1600 | 0.1708          | 18805632          |
+| 0.1678        | 2.7473 | 1700 | 0.1674          | 19982720          |
+| 0.1581        | 2.9090 | 1800 | 0.1618          | 21154816          |
+| 0.1348        | 3.0696 | 1900 | 0.1682          | 22320640          |
+| 0.1192        | 3.2313 | 2000 | 0.1588          | 23493120          |
+| 0.1323        | 3.3930 | 2100 | 0.1559          | 24670976          |
+| 0.1263        | 3.5548 | 2200 | 0.1524          | 25849728          |
+| 0.1155        | 3.7165 | 2300 | 0.1491          | 27029120          |
+| 0.1162        | 3.8783 | 2400 | 0.1464          | 28204928          |
+| 0.0687        | 4.0388 | 2500 | 0.1594          | 29370240          |
+| 0.0695        | 4.2006 | 2600 | 0.1708          | 30547712          |
+| 0.0631        | 4.3623 | 2700 | 0.1689          | 31720576          |
+| 0.0614        | 4.5241 | 2800 | 0.1685          | 32897024          |
+| 0.0692        | 4.6858 | 2900 | 0.1681          | 34075520          |
+| 0.0651        | 4.8476 | 3000 | 0.1680          | 35252352          |
 ### Framework versions
 - PEFT 0.14.0
+- Transformers 4.51.1
 - Pytorch 2.3.1+cu121
 - Datasets 3.5.0
+- Tokenizers 0.21.1

adapter_config.json CHANGED Viewed

@@ -24,12 +24,12 @@
   "revision": null,
   "target_modules": [
     "o_proj",
-    "v_proj",
-    "down_proj",
-    "gate_proj",
     "q_proj",
     "k_proj",
-    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "revision": null,
   "target_modules": [
     "o_proj",
     "q_proj",
+    "gate_proj",
+    "up_proj",
     "k_proj",
+    "v_proj",
+    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3b06d8e6f9e5a57088cc0ffb91f0185daaf8dd39f94cdc28b42e99bc4d955d04
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee1fd50f64de9fd8d7d8430bffb36a78e5e7892844e990686b97457a5e61c5a3
 size 639691872

all_results.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
-    "epoch": 4.980615735461802,
-    "eval_loss": 0.25186342000961304,
-    "eval_runtime": 33.1464,
-    "eval_samples_per_second": 94.068,
-    "eval_steps_per_second": 5.883,
-    "num_input_tokens_seen": 13420336,
-    "total_flos": 5.449127300160553e+17,
-    "train_loss": 0.3250706321844772,
-    "train_runtime": 5438.0568,
-    "train_samples_per_second": 25.798,
-    "train_steps_per_second": 0.201
 }

 {
+    "epoch": 4.993125758188435,
+    "eval_loss": 0.16754263639450073,
+    "eval_runtime": 91.5328,
+    "eval_samples_per_second": 96.064,
+    "eval_steps_per_second": 6.009,
+    "num_input_tokens_seen": 36312064,
+    "total_flos": 1.474397207846191e+18,
+    "train_loss": 0.2081744998500571,
+    "train_runtime": 16798.8058,
+    "train_samples_per_second": 23.554,
+    "train_steps_per_second": 0.184
 }

eval_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 4.980615735461802,
-    "eval_loss": 0.25186342000961304,
-    "eval_runtime": 33.1464,
-    "eval_samples_per_second": 94.068,
-    "eval_steps_per_second": 5.883,
-    "num_input_tokens_seen": 13420336
 }

 {
+    "epoch": 4.993125758188435,
+    "eval_loss": 0.16754263639450073,
+    "eval_runtime": 91.5328,
+    "eval_samples_per_second": 96.064,
+    "eval_steps_per_second": 6.009,
+    "num_input_tokens_seen": 36312064
 }