jbenbudd commited on
Commit
352c3ca
·
1 Parent(s): d52da83

train_residue_list_lr_5e-4_5_epochs

Browse files
README.md CHANGED
@@ -7,19 +7,19 @@ tags:
7
  - lora
8
  - generated_from_trainer
9
  model-index:
10
- - name: train_residue_list_lr_3e-4_5_epochs_neg_1
11
  results: []
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
- # train_residue_list_lr_3e-4_5_epochs_neg_1
18
 
19
  This model is a fine-tuned version of [GreatCaptainNemo/ProLLaMA_Stage_1](https://huggingface.co/GreatCaptainNemo/ProLLaMA_Stage_1) on the adpr_train dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.2438
22
- - Num Input Tokens Seen: 18724952
23
 
24
  ## Model description
25
 
@@ -38,7 +38,7 @@ More information needed
38
  ### Training hyperparameters
39
 
40
  The following hyperparameters were used during training:
41
- - learning_rate: 0.0003
42
  - train_batch_size: 16
43
  - eval_batch_size: 16
44
  - seed: 42
@@ -53,27 +53,22 @@ The following hyperparameters were used during training:
53
 
54
  | Training Loss | Epoch | Step | Validation Loss | Input Tokens Seen |
55
  |:-------------:|:------:|:----:|:---------------:|:-----------------:|
56
- | 0.4277 | 0.3225 | 100 | 0.4275 | 1212416 |
57
- | 0.3824 | 0.6449 | 200 | 0.3592 | 2422016 |
58
- | 0.3314 | 0.9674 | 300 | 0.3160 | 3633408 |
59
- | 0.2962 | 1.2870 | 400 | 0.2967 | 4832344 |
60
- | 0.2829 | 1.6094 | 500 | 0.2788 | 6041304 |
61
- | 0.2524 | 1.9319 | 600 | 0.2397 | 7252056 |
62
- | 0.2185 | 2.2515 | 700 | 0.2294 | 8452392 |
63
- | 0.2162 | 2.5740 | 800 | 0.2240 | 9665576 |
64
- | 0.1905 | 2.8964 | 900 | 0.2098 | 10875816 |
65
- | 0.1481 | 3.2160 | 1000 | 0.2189 | 12076160 |
66
- | 0.15 | 3.5385 | 1100 | 0.2087 | 13288832 |
67
- | 0.1387 | 3.8609 | 1200 | 0.1994 | 14501632 |
68
- | 0.0735 | 4.1806 | 1300 | 0.2397 | 15699672 |
69
- | 0.0701 | 4.5030 | 1400 | 0.2403 | 16908632 |
70
- | 0.0681 | 4.8255 | 1500 | 0.2435 | 18117848 |
71
 
72
 
73
  ### Framework versions
74
 
75
  - PEFT 0.14.0
76
- - Transformers 4.51.1
77
  - Pytorch 2.3.1+cu121
78
  - Datasets 3.5.0
79
- - Tokenizers 0.21.1
 
7
  - lora
8
  - generated_from_trainer
9
  model-index:
10
+ - name: train_residue_list_lr_5e-4_5_epochs
11
  results: []
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
+ # train_residue_list_lr_5e-4_5_epochs
18
 
19
  This model is a fine-tuned version of [GreatCaptainNemo/ProLLaMA_Stage_1](https://huggingface.co/GreatCaptainNemo/ProLLaMA_Stage_1) on the adpr_train dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.5254
22
+ - Num Input Tokens Seen: 13416448
23
 
24
  ## Model description
25
 
 
38
  ### Training hyperparameters
39
 
40
  The following hyperparameters were used during training:
41
+ - learning_rate: 0.0005
42
  - train_batch_size: 16
43
  - eval_batch_size: 16
44
  - seed: 42
 
53
 
54
  | Training Loss | Epoch | Step | Validation Loss | Input Tokens Seen |
55
  |:-------------:|:------:|:----:|:---------------:|:-----------------:|
56
+ | 2.5191 | 0.4561 | 100 | 2.8151 | 1230464 |
57
+ | 0.7346 | 0.9122 | 200 | 0.6728 | 2455680 |
58
+ | 1.9608 | 1.3649 | 300 | 1.7802 | 3673744 |
59
+ | 3.2759 | 1.8210 | 400 | 1.0581 | 4901648 |
60
+ | 0.5697 | 2.2737 | 500 | 0.5570 | 6120272 |
61
+ | 0.5411 | 2.7298 | 600 | 0.5424 | 7348944 |
62
+ | 0.5415 | 3.1824 | 700 | 0.5388 | 8570128 |
63
+ | 0.5477 | 3.6385 | 800 | 0.5335 | 9801744 |
64
+ | 0.5305 | 4.0912 | 900 | 0.5296 | 11019520 |
65
+ | 0.5281 | 4.5473 | 1000 | 0.5262 | 12247168 |
 
 
 
 
 
66
 
67
 
68
  ### Framework versions
69
 
70
  - PEFT 0.14.0
71
+ - Transformers 4.48.3
72
  - Pytorch 2.3.1+cu121
73
  - Datasets 3.5.0
74
+ - Tokenizers 0.21.0
adapter_config.json CHANGED
@@ -24,12 +24,12 @@
24
  "revision": null,
25
  "target_modules": [
26
  "k_proj",
27
- "gate_proj",
28
  "v_proj",
 
29
  "o_proj",
 
30
  "q_proj",
31
- "down_proj",
32
- "up_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
24
  "revision": null,
25
  "target_modules": [
26
  "k_proj",
 
27
  "v_proj",
28
+ "gate_proj",
29
  "o_proj",
30
+ "up_proj",
31
  "q_proj",
32
+ "down_proj"
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66eb45d88a880716d3f14ec5b62744622e04f5cff153b98ebd4c0ad10cc5b59e
3
  size 639691872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1b1cff0d311ec71991a98b2179ddea7adbb3fb733f9272b15e6da197193517a
3
  size 639691872
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 4.986698911729142,
3
- "eval_loss": 0.243755504488945,
4
- "eval_runtime": 46.4843,
5
- "eval_samples_per_second": 94.871,
6
- "eval_steps_per_second": 5.937,
7
- "num_input_tokens_seen": 18724952,
8
- "total_flos": 7.602987521131807e+17,
9
- "train_loss": 0.2932651937200177,
10
- "train_runtime": 7879.3287,
11
- "train_samples_per_second": 25.18,
12
- "train_steps_per_second": 0.197
13
  }
 
1
  {
2
+ "epoch": 4.980615735461802,
3
+ "eval_loss": 0.5253703594207764,
4
+ "eval_runtime": 33.4842,
5
+ "eval_samples_per_second": 93.118,
6
+ "eval_steps_per_second": 5.824,
7
+ "num_input_tokens_seen": 13416448,
8
+ "total_flos": 5.447548635740897e+17,
9
+ "train_loss": 1.1898179768427322,
10
+ "train_runtime": 5468.9169,
11
+ "train_samples_per_second": 25.652,
12
+ "train_steps_per_second": 0.2
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.986698911729142,
3
- "eval_loss": 0.243755504488945,
4
- "eval_runtime": 46.4843,
5
- "eval_samples_per_second": 94.871,
6
- "eval_steps_per_second": 5.937,
7
- "num_input_tokens_seen": 18724952
8
  }
 
1
  {
2
+ "epoch": 4.980615735461802,
3
+ "eval_loss": 0.5253703594207764,
4
+ "eval_runtime": 33.4842,
5
+ "eval_samples_per_second": 93.118,
6
+ "eval_steps_per_second": 5.824,
7
+ "num_input_tokens_seen": 13416448
8
  }
model_eval_results.csv CHANGED
The diff for this file is too large to render. See raw diff
 
trainer_log.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43b58080bbcdbfcc2aa89fe77dfea14ec621f4df578e337948cd841e3cd66c2b
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d4f652dcb7d7f75bc908f23f8eba6c917ad7ac15d71310e1d8b7b5074bcba3c
3
  size 5688
training_eval_loss.png CHANGED

Git LFS Details

  • SHA256: 0b9aa5ea6655d49b550bcfc8772e3a6640e57f80abcd130846091cec4f015c26
  • Pointer size: 130 Bytes
  • Size of remote file: 39.5 kB

Git LFS Details

  • SHA256: 01a797f898b8e776c42056671340c8981bbf0c1b8147f2f49e040ac3604e728a
  • Pointer size: 130 Bytes
  • Size of remote file: 36.9 kB
training_loss.png CHANGED

Git LFS Details

  • SHA256: bd9a9a4c4588967843dfe4f50ed0b44d8437d4e0489e7dbea40f7b29caa3943f
  • Pointer size: 130 Bytes
  • Size of remote file: 30.3 kB

Git LFS Details

  • SHA256: 3c014d11612a2433f1daf1588fca194d7b48f28ebba924747c4168b989bcd923
  • Pointer size: 130 Bytes
  • Size of remote file: 39.3 kB