yifan-Eva commited on
Commit
f6f67f7
·
verified ·
1 Parent(s): c995102

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. modpo/use_reward/0.5helpful_0.5harmless/best_checkpoint/README.md +10 -0
  2. modpo/use_reward/0.5helpful_0.5harmless/best_checkpoint/adapter_config.json +23 -0
  3. modpo/use_reward/0.5helpful_0.5harmless/best_checkpoint/adapter_model.bin +3 -0
  4. modpo/use_reward/0.5helpful_0.5harmless/best_checkpoint/special_tokens_map.json +24 -0
  5. modpo/use_reward/0.5helpful_0.5harmless/best_checkpoint/tokenizer.json +0 -0
  6. modpo/use_reward/0.5helpful_0.5harmless/best_checkpoint/tokenizer.model +3 -0
  7. modpo/use_reward/0.5helpful_0.5harmless/best_checkpoint/tokenizer_config.json +39 -0
  8. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/README.md +10 -0
  9. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/adapter_config.json +23 -0
  10. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/adapter_model.bin +3 -0
  11. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/optimizer.pt +3 -0
  12. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/rng_state_0.pth +3 -0
  13. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/rng_state_1.pth +3 -0
  14. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/rng_state_2.pth +3 -0
  15. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/rng_state_3.pth +3 -0
  16. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/scheduler.pt +3 -0
  17. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/special_tokens_map.json +24 -0
  18. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/tokenizer.json +0 -0
  19. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/tokenizer.model +3 -0
  20. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/tokenizer_config.json +39 -0
  21. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/trainer_state.json +0 -0
  22. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/training_args.bin +3 -0
  23. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/README.md +10 -0
  24. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/adapter_config.json +23 -0
  25. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/adapter_model.bin +3 -0
  26. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/optimizer.pt +3 -0
  27. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/rng_state_0.pth +3 -0
  28. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/rng_state_1.pth +3 -0
  29. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/rng_state_2.pth +3 -0
  30. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/rng_state_3.pth +3 -0
  31. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/scheduler.pt +3 -0
  32. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/special_tokens_map.json +24 -0
  33. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/tokenizer.json +0 -0
  34. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/tokenizer.model +3 -0
  35. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/tokenizer_config.json +39 -0
  36. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/trainer_state.json +0 -0
  37. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/training_args.bin +3 -0
  38. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/README.md +10 -0
  39. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/adapter_config.json +23 -0
  40. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/adapter_model.bin +3 -0
  41. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/optimizer.pt +3 -0
  42. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/rng_state_0.pth +3 -0
  43. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/rng_state_1.pth +3 -0
  44. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/rng_state_2.pth +3 -0
  45. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/rng_state_3.pth +3 -0
  46. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/scheduler.pt +3 -0
  47. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/special_tokens_map.json +24 -0
  48. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/tokenizer.json +0 -0
  49. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/tokenizer.model +3 -0
  50. modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/tokenizer_config.json +39 -0
modpo/use_reward/0.5helpful_0.5harmless/best_checkpoint/README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+ - PEFT 0.5.0
9
+
10
+ - PEFT 0.5.0
modpo/use_reward/0.5helpful_0.5harmless/best_checkpoint/adapter_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/shared/nas/data/m1/xuejunz2/model_weights/Llama-2-7b-hf",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 1,
11
+ "lora_dropout": 0.0,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 64,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "k_proj",
19
+ "v_proj",
20
+ "o_proj"
21
+ ],
22
+ "task_type": "CAUSAL_LM"
23
+ }
modpo/use_reward/0.5helpful_0.5harmless/best_checkpoint/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fc5b7ab04df31362e7f302517937bd743edf818e25c08918e31aa43312b88a7
3
+ size 268528394
modpo/use_reward/0.5helpful_0.5harmless/best_checkpoint/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
modpo/use_reward/0.5helpful_0.5harmless/best_checkpoint/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
modpo/use_reward/0.5helpful_0.5harmless/best_checkpoint/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
modpo/use_reward/0.5helpful_0.5harmless/best_checkpoint/tokenizer_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "bos_token": "<s>",
29
+ "clean_up_tokenization_spaces": false,
30
+ "eos_token": "</s>",
31
+ "legacy": false,
32
+ "model_max_length": 1000000000000000019884624838656,
33
+ "pad_token": "</s>",
34
+ "padding_side": "right",
35
+ "sp_model_kwargs": {},
36
+ "tokenizer_class": "LlamaTokenizer",
37
+ "unk_token": "<unk>",
38
+ "use_default_system_prompt": true
39
+ }
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+ - PEFT 0.5.0
9
+
10
+ - PEFT 0.5.0
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/adapter_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/shared/nas/data/m1/xuejunz2/model_weights/Llama-2-7b-hf",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 1,
11
+ "lora_dropout": 0.0,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 64,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "k_proj",
19
+ "v_proj",
20
+ "o_proj"
21
+ ],
22
+ "task_type": "CAUSAL_LM"
23
+ }
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fc5b7ab04df31362e7f302517937bd743edf818e25c08918e31aa43312b88a7
3
+ size 268528394
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24c8af7e4d0f6f25c258d031819f09bef5225b30cd130a31a2e1885210ee8bd2
3
+ size 537086714
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e812528c12db18ea461d9e3bd3cc22ea90b0101393312170b1e2fc74c940bd81
3
+ size 14960
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37194a6d48612e1a46a2d5d317ead97c70d9fc4569b0118fcd5f84c3dc9daa5a
3
+ size 15024
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17c179483659a784aa1ace2427daff48c556a6bcc3c330e6f3274e4dc95e4b49
3
+ size 15024
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b56857c9b117629f35af2c3d64f522d33a9d8aa94faa81ec6956380a895118c4
3
+ size 15024
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbed1d512c580bc3b1b5698c62fe6764e95e636085f70208357c5bdfe1fb3007
3
+ size 1064
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/tokenizer_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "bos_token": "<s>",
29
+ "clean_up_tokenization_spaces": false,
30
+ "eos_token": "</s>",
31
+ "legacy": false,
32
+ "model_max_length": 1000000000000000019884624838656,
33
+ "pad_token": "</s>",
34
+ "padding_side": "right",
35
+ "sp_model_kwargs": {},
36
+ "tokenizer_class": "LlamaTokenizer",
37
+ "unk_token": "<unk>",
38
+ "use_default_system_prompt": true
39
+ }
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-12000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57a736e0c844803a29aa7b0154d1456168357c89757b27478f38bdb70d0921c8
3
+ size 4600
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+ - PEFT 0.5.0
9
+
10
+ - PEFT 0.5.0
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/adapter_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/shared/nas/data/m1/xuejunz2/model_weights/Llama-2-7b-hf",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 1,
11
+ "lora_dropout": 0.0,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 64,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "k_proj",
19
+ "v_proj",
20
+ "o_proj"
21
+ ],
22
+ "task_type": "CAUSAL_LM"
23
+ }
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55f3432da6ac27bb56b1c36d242bd96341b47e56e12f038ed1008798f09cef9f
3
+ size 268528394
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5896c0f12d577a738a4be00c1faefe8c0d722ea3372a94ef3a0e4666d1330f9
3
+ size 537086714
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0f69b5414e85c17b526e8c65193f06ef8d6195a8c8383efe1d2f9d044824583
3
+ size 14960
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49d1438e98cc9c53a6852464635ce62e9788e61eb3646b73e33813f487c4b6ae
3
+ size 15024
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4388add9cec90932f8ff0100d27a0574d98e1bad52ff89d44e31967d2b4fbfde
3
+ size 15024
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a705d6dfaae4f2c1b4b2be6b25a6eb521ffae6fcba21cc1531e97b60037ed079
3
+ size 15024
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5456ef20d8707787136301aa23039ee4cf82a6cae6197cab80f2a73d53d2163a
3
+ size 1064
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/tokenizer_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "bos_token": "<s>",
29
+ "clean_up_tokenization_spaces": false,
30
+ "eos_token": "</s>",
31
+ "legacy": false,
32
+ "model_max_length": 1000000000000000019884624838656,
33
+ "pad_token": "</s>",
34
+ "padding_side": "right",
35
+ "sp_model_kwargs": {},
36
+ "tokenizer_class": "LlamaTokenizer",
37
+ "unk_token": "<unk>",
38
+ "use_default_system_prompt": true
39
+ }
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-6000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57a736e0c844803a29aa7b0154d1456168357c89757b27478f38bdb70d0921c8
3
+ size 4600
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+ - PEFT 0.5.0
9
+
10
+ - PEFT 0.5.0
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/adapter_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/shared/nas/data/m1/xuejunz2/model_weights/Llama-2-7b-hf",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 1,
11
+ "lora_dropout": 0.0,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 64,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "k_proj",
19
+ "v_proj",
20
+ "o_proj"
21
+ ],
22
+ "task_type": "CAUSAL_LM"
23
+ }
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:193a9e6cc8762cd885f0c548f6543068ce28ff85b3b32cd9b958a5497fe981aa
3
+ size 268528394
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e08274a60c84f0390ff483f25b177bcdcc9a6c4c5b51d2498c5e05bc9293107e
3
+ size 537086714
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:331df8a1fce70deea17ec1af86cf0dc22ecc098b0c4860cf6b22d89e76c71a14
3
+ size 14960
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e07ace389d24bc1307b74f42a1e7b8f0117b0db853e2df64ff3f15cb92916a2
3
+ size 15024
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da6a990f346d7014dffb28fa2bc7d3b890bd3c53712503fce3656da48d3d6e50
3
+ size 15024
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e95f356ca38179b05993f55daece0223e96fa10b9a1b9ea2102a739211333f63
3
+ size 15024
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99b2347f69fca9ef2a9593be88870533f267e2835330293340da3b589c9a7b97
3
+ size 1064
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
modpo/use_reward/0.5helpful_0.5harmless/checkpoint-9000/tokenizer_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "bos_token": "<s>",
29
+ "clean_up_tokenization_spaces": false,
30
+ "eos_token": "</s>",
31
+ "legacy": false,
32
+ "model_max_length": 1000000000000000019884624838656,
33
+ "pad_token": "</s>",
34
+ "padding_side": "right",
35
+ "sp_model_kwargs": {},
36
+ "tokenizer_class": "LlamaTokenizer",
37
+ "unk_token": "<unk>",
38
+ "use_default_system_prompt": true
39
+ }