Evan-Lin commited on
Commit
49b2d74
·
verified ·
1 Parent(s): f668a8b

Evan-Lin/dpo-llama-chat

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ dpo-dataset.json filter=lfs diff=lfs merge=lfs -text
37
+ gigaspeech_prompt_xs_large_v3_wer.json filter=lfs diff=lfs merge=lfs -text
38
+ sft-dataset.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ tags:
4
+ - trl
5
+ - dpo
6
+ - generated_from_trainer
7
+ base_model: meta-llama/Llama-2-7b-hf
8
+ model-index:
9
+ - name: results
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # results
17
+
18
+ This model is a fine-tuned version of [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) on the None dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.0483
21
+ - Rewards/chosen: 0.8443
22
+ - Rewards/rejected: -4.9894
23
+ - Rewards/accuracies: 0.9864
24
+ - Rewards/margins: 5.8337
25
+ - Logps/rejected: -163.0178
26
+ - Logps/chosen: -85.8088
27
+ - Logits/rejected: -1.0144
28
+ - Logits/chosen: -0.8703
29
+
30
+ ## Model description
31
+
32
+ More information needed
33
+
34
+ ## Intended uses & limitations
35
+
36
+ More information needed
37
+
38
+ ## Training and evaluation data
39
+
40
+ More information needed
41
+
42
+ ## Training procedure
43
+
44
+ ### Training hyperparameters
45
+
46
+ The following hyperparameters were used during training:
47
+ - learning_rate: 0.0005
48
+ - train_batch_size: 1
49
+ - eval_batch_size: 1
50
+ - seed: 42
51
+ - gradient_accumulation_steps: 64
52
+ - total_train_batch_size: 64
53
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
54
+ - lr_scheduler_type: cosine
55
+ - lr_scheduler_warmup_steps: 100
56
+ - training_steps: 1000
57
+ - mixed_precision_training: Native AMP
58
+
59
+ ### Training results
60
+
61
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
+ | 0.5635 | 0.24 | 100 | 0.5460 | 0.2168 | -0.4663 | 0.7367 | 0.6831 | -117.7869 | -92.0844 | -1.3150 | -1.2411 |
64
+ | 0.3836 | 0.47 | 200 | 0.3582 | 0.1507 | -1.4599 | 0.8494 | 1.6106 | -127.7231 | -92.7453 | -0.6842 | -0.5917 |
65
+ | 0.2525 | 0.71 | 300 | 0.2509 | 0.6325 | -1.7217 | 0.9095 | 2.3542 | -130.3404 | -87.9269 | -0.7855 | -0.6860 |
66
+ | 0.1625 | 0.94 | 400 | 0.1711 | 0.6613 | -2.8054 | 0.9357 | 3.4667 | -141.1781 | -87.6390 | -0.7853 | -0.6836 |
67
+ | 0.0695 | 1.18 | 500 | 0.1215 | 0.6443 | -3.7903 | 0.9589 | 4.4347 | -151.0267 | -87.8085 | -0.8915 | -0.7635 |
68
+ | 0.0448 | 1.42 | 600 | 0.0905 | 1.0284 | -4.1415 | 0.9698 | 5.1699 | -154.5387 | -83.9677 | -0.9632 | -0.8182 |
69
+ | 0.0515 | 1.65 | 700 | 0.0760 | 1.1233 | -3.6423 | 0.9758 | 4.7656 | -149.5469 | -83.0189 | -0.9748 | -0.8504 |
70
+ | 0.0396 | 1.89 | 800 | 0.0542 | 0.7363 | -4.9101 | 0.9864 | 5.6464 | -162.2247 | -86.8886 | -1.0377 | -0.8963 |
71
+ | 0.0099 | 2.13 | 900 | 0.0486 | 0.8344 | -4.9605 | 0.9864 | 5.7949 | -162.7287 | -85.9078 | -1.0199 | -0.8760 |
72
+ | 0.0107 | 2.36 | 1000 | 0.0483 | 0.8443 | -4.9894 | 0.9864 | 5.8337 | -163.0178 | -85.8088 | -1.0144 | -0.8703 |
73
+
74
+
75
+ ### Framework versions
76
+
77
+ - PEFT 0.7.1
78
+ - Transformers 4.37.0.dev0
79
+ - Pytorch 2.1.2+cu121
80
+ - Datasets 2.15.0
81
+ - Tokenizers 0.15.0
adapter_config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "loftq_config": {},
12
+ "lora_alpha": 16,
13
+ "lora_dropout": 0.05,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
+ "modules_to_save": null,
17
+ "peft_type": "LORA",
18
+ "r": 8,
19
+ "rank_pattern": {},
20
+ "revision": null,
21
+ "target_modules": [
22
+ "k_proj",
23
+ "fc_in",
24
+ "out_proj",
25
+ "q_proj",
26
+ "v_proj",
27
+ "wte",
28
+ "fc_out"
29
+ ],
30
+ "task_type": "CAUSAL_LM"
31
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88de8b810a4a78a446194be966750655c96d5449e79ef950203c3c92f85b2ed5
3
+ size 25191360
dpo-dataset.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2a6d6f23c6bcbca11a5cef1a4970fc5012796582fd08457532e1db66980e9df
3
+ size 18308271
gigaspeech-no-previous-xs.json ADDED
The diff for this file is too large to render. See raw diff
 
gigaspeech-prompts-pool-gt.json ADDED
The diff for this file is too large to render. See raw diff
 
gigaspeech-prompts-pool-wer-gt.json ADDED
The diff for this file is too large to render. See raw diff
 
gigaspeech-train-default.json ADDED
The diff for this file is too large to render. See raw diff
 
gigaspeech-train-no-previous.json ADDED
The diff for this file is too large to render. See raw diff
 
gigaspeech_no-previous_xs.json ADDED
The diff for this file is too large to render. See raw diff
 
gigaspeech_prompt_xs_large_v3_wer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19a75d9ee3250fa61c36e2211203a7c31dbb14e18eeeb0647b7f6a8b9341d5e9
3
+ size 26504717
merge_json.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+
4
+ files = ["gigaspeech_no-previous_test_0-5000.json", "gigaspeech_no-previous_test_5000-10000.json", "gigaspeech_no-previous_test_10000-15000.json", "gigaspeech_no-previous_test_15000-20000.json"]
5
+ output_file = "gigaspeech_no-previous_test.json"
6
+
7
+ final_output = {}
8
+ for file in files:
9
+ with open(file) as f:
10
+ data = json.load(f)
11
+ final_output.update(data)
12
+
13
+ with open(output_file, "w") as f:
14
+ json.dump(final_output, f)
merge_list.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+
4
+ files = ["gigaspeech_prompt_xs_large_v3_wer_0-20000.json", "gigaspeech_prompt_xs_large_v3_wer_20000-99999999.json"]
5
+ output_file = "gigaspeech_prompt_xs_large_v3_wer.json"
6
+ final_output = []
7
+ for file in files:
8
+ with open(file) as f:
9
+ data = json.load(f)
10
+ final_output.extend(data)
11
+
12
+ with open(output_file, "w") as f:
13
+ json.dump(final_output, f)
ntu-course-default.json ADDED
The diff for this file is too large to render. See raw diff
 
ntu-course-no-previous.json ADDED
The diff for this file is too large to render. See raw diff
 
ntu-course-sum-1.json ADDED
The diff for this file is too large to render. See raw diff
 
ntu-course-sum-2.json ADDED
The diff for this file is too large to render. See raw diff
 
sft-dataset.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2a6d6f23c6bcbca11a5cef1a4970fc5012796582fd08457532e1db66980e9df
3
+ size 18308271
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tie-default.json ADDED
The diff for this file is too large to render. See raw diff
 
tie-keyword-1.json ADDED
The diff for this file is too large to render. See raw diff
 
tie-no-previous.json ADDED
The diff for this file is too large to render. See raw diff
 
tie-sum-1.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "bos_token": "<s>",
31
+ "clean_up_tokenization_spaces": false,
32
+ "eos_token": "</s>",
33
+ "legacy": false,
34
+ "model_max_length": 1000000000000000019884624838656,
35
+ "pad_token": "</s>",
36
+ "padding_side": "right",
37
+ "sp_model_kwargs": {},
38
+ "tokenizer_class": "LlamaTokenizer",
39
+ "unk_token": "<unk>",
40
+ "use_default_system_prompt": false
41
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:949e32986ef53e33661e230b0aa8cd28cec4469b31448b29ebb37de8b6c6651f
3
+ size 4664