train_residue_list_lr_5e-4_5_epochs

Browse files

Files changed (11) hide show

README.md +17 -22
adapter_config.json +3 -3
adapter_model.safetensors +1 -1
all_results.json +11 -11
eval_results.json +6 -6
model_eval_results.csv +0 -0
trainer_log.jsonl +0 -0
trainer_state.json +0 -0
training_args.bin +1 -1
training_eval_loss.png +2 -2
training_loss.png +2 -2

README.md CHANGED Viewed

@@ -7,19 +7,19 @@ tags:
 - lora
 - generated_from_trainer
 model-index:
-- name: train_residue_list_lr_3e-4_5_epochs_neg_1
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# train_residue_list_lr_3e-4_5_epochs_neg_1
 This model is a fine-tuned version of [GreatCaptainNemo/ProLLaMA_Stage_1](https://huggingface.co/GreatCaptainNemo/ProLLaMA_Stage_1) on the adpr_train dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.2438
-- Num Input Tokens Seen: 18724952
 ## Model description
@@ -38,7 +38,7 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 0.0003
 - train_batch_size: 16
 - eval_batch_size: 16
 - seed: 42
@@ -53,27 +53,22 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss | Input Tokens Seen |
 |:-------------:|:------:|:----:|:---------------:|:-----------------:|
-| 0.4277        | 0.3225 | 100  | 0.4275          | 1212416           |
-| 0.3824        | 0.6449 | 200  | 0.3592          | 2422016           |
-| 0.3314        | 0.9674 | 300  | 0.3160          | 3633408           |
-| 0.2962        | 1.2870 | 400  | 0.2967          | 4832344           |
-| 0.2829        | 1.6094 | 500  | 0.2788          | 6041304           |
-| 0.2524        | 1.9319 | 600  | 0.2397          | 7252056           |
-| 0.2185        | 2.2515 | 700  | 0.2294          | 8452392           |
-| 0.2162        | 2.5740 | 800  | 0.2240          | 9665576           |
-| 0.1905        | 2.8964 | 900  | 0.2098          | 10875816          |
-| 0.1481        | 3.2160 | 1000 | 0.2189          | 12076160          |
-| 0.15          | 3.5385 | 1100 | 0.2087          | 13288832          |
-| 0.1387        | 3.8609 | 1200 | 0.1994          | 14501632          |
-| 0.0735        | 4.1806 | 1300 | 0.2397          | 15699672          |
-| 0.0701        | 4.5030 | 1400 | 0.2403          | 16908632          |
-| 0.0681        | 4.8255 | 1500 | 0.2435          | 18117848          |
 ### Framework versions
 - PEFT 0.14.0
-- Transformers 4.51.1
 - Pytorch 2.3.1+cu121
 - Datasets 3.5.0
-- Tokenizers 0.21.1

 - lora
 - generated_from_trainer
 model-index:
+- name: train_residue_list_lr_5e-4_5_epochs
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# train_residue_list_lr_5e-4_5_epochs
 This model is a fine-tuned version of [GreatCaptainNemo/ProLLaMA_Stage_1](https://huggingface.co/GreatCaptainNemo/ProLLaMA_Stage_1) on the adpr_train dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.5254
+- Num Input Tokens Seen: 13416448
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 0.0005
 - train_batch_size: 16
 - eval_batch_size: 16
 - seed: 42
 | Training Loss | Epoch  | Step | Validation Loss | Input Tokens Seen |
 |:-------------:|:------:|:----:|:---------------:|:-----------------:|
+| 2.5191        | 0.4561 | 100  | 2.8151          | 1230464           |
+| 0.7346        | 0.9122 | 200  | 0.6728          | 2455680           |
+| 1.9608        | 1.3649 | 300  | 1.7802          | 3673744           |
+| 3.2759        | 1.8210 | 400  | 1.0581          | 4901648           |
+| 0.5697        | 2.2737 | 500  | 0.5570          | 6120272           |
+| 0.5411        | 2.7298 | 600  | 0.5424          | 7348944           |
+| 0.5415        | 3.1824 | 700  | 0.5388          | 8570128           |
+| 0.5477        | 3.6385 | 800  | 0.5335          | 9801744           |
+| 0.5305        | 4.0912 | 900  | 0.5296          | 11019520          |
+| 0.5281        | 4.5473 | 1000 | 0.5262          | 12247168          |
 ### Framework versions
 - PEFT 0.14.0
+- Transformers 4.48.3
 - Pytorch 2.3.1+cu121
 - Datasets 3.5.0
+- Tokenizers 0.21.0

adapter_config.json CHANGED Viewed

@@ -24,12 +24,12 @@
   "revision": null,
   "target_modules": [
     "k_proj",
-    "gate_proj",
     "v_proj",
     "o_proj",
     "q_proj",
-    "down_proj",
-    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "revision": null,
   "target_modules": [
     "k_proj",
     "v_proj",
+    "gate_proj",
     "o_proj",
+    "up_proj",
     "q_proj",
+    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:66eb45d88a880716d3f14ec5b62744622e04f5cff153b98ebd4c0ad10cc5b59e
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:c1b1cff0d311ec71991a98b2179ddea7adbb3fb733f9272b15e6da197193517a
 size 639691872

all_results.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
-    "epoch": 4.986698911729142,
-    "eval_loss": 0.243755504488945,
-    "eval_runtime": 46.4843,
-    "eval_samples_per_second": 94.871,
-    "eval_steps_per_second": 5.937,
-    "num_input_tokens_seen": 18724952,
-    "total_flos": 7.602987521131807e+17,
-    "train_loss": 0.2932651937200177,
-    "train_runtime": 7879.3287,
-    "train_samples_per_second": 25.18,
-    "train_steps_per_second": 0.197
 }

 {
+    "epoch": 4.980615735461802,
+    "eval_loss": 0.5253703594207764,
+    "eval_runtime": 33.4842,
+    "eval_samples_per_second": 93.118,
+    "eval_steps_per_second": 5.824,
+    "num_input_tokens_seen": 13416448,
+    "total_flos": 5.447548635740897e+17,
+    "train_loss": 1.1898179768427322,
+    "train_runtime": 5468.9169,
+    "train_samples_per_second": 25.652,
+    "train_steps_per_second": 0.2
 }

eval_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 4.986698911729142,
-    "eval_loss": 0.243755504488945,
-    "eval_runtime": 46.4843,
-    "eval_samples_per_second": 94.871,
-    "eval_steps_per_second": 5.937,
-    "num_input_tokens_seen": 18724952
 }

 {
+    "epoch": 4.980615735461802,
+    "eval_loss": 0.5253703594207764,
+    "eval_runtime": 33.4842,
+    "eval_samples_per_second": 93.118,
+    "eval_steps_per_second": 5.824,
+    "num_input_tokens_seen": 13416448
 }