rogerscuall commited on
Commit
bf50da8
·
verified ·
1 Parent(s): 4f9f36c

End of training

Browse files
README.md CHANGED
@@ -4,8 +4,8 @@ library_name: transformers
4
  model_name: phi3-mini-yoda-adapter
5
  tags:
6
  - generated_from_trainer
7
- - trl
8
  - sft
 
9
  licence: license
10
  ---
11
 
@@ -34,11 +34,11 @@ This model was trained with SFT.
34
 
35
  ### Framework versions
36
 
37
- - TRL: 0.14.0
38
- - Transformers: 4.48.2
39
- - Pytorch: 2.6.0
40
- - Datasets: 3.2.0
41
- - Tokenizers: 0.21.0
42
 
43
  ## Citations
44
 
@@ -49,7 +49,7 @@ Cite TRL as:
49
  ```bibtex
50
  @misc{vonwerra2022trl,
51
  title = {{TRL: Transformer Reinforcement Learning}},
52
- author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
53
  year = 2020,
54
  journal = {GitHub repository},
55
  publisher = {GitHub},
 
4
  model_name: phi3-mini-yoda-adapter
5
  tags:
6
  - generated_from_trainer
 
7
  - sft
8
+ - trl
9
  licence: license
10
  ---
11
 
 
34
 
35
  ### Framework versions
36
 
37
+ - TRL: 0.19.1
38
+ - Transformers: 4.53.1
39
+ - Pytorch: 2.6.0+cu124
40
+ - Datasets: 4.0.0
41
+ - Tokenizers: 0.21.2
42
 
43
  ## Citations
44
 
 
49
  ```bibtex
50
  @misc{vonwerra2022trl,
51
  title = {{TRL: Transformer Reinforcement Learning}},
52
+ author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
53
  year = 2020,
54
  journal = {GitHub repository},
55
  publisher = {GitHub},
adapter_config.json CHANGED
@@ -3,6 +3,7 @@
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "microsoft/Phi-3-mini-4k-instruct",
5
  "bias": "none",
 
6
  "eva_config": null,
7
  "exclude_modules": null,
8
  "fan_in_fan_out": false,
@@ -19,16 +20,19 @@
19
  "megatron_core": "megatron.core",
20
  "modules_to_save": null,
21
  "peft_type": "LORA",
 
22
  "r": 8,
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "down_proj",
27
  "qkv_proj",
28
  "o_proj",
29
- "gate_up_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
 
32
  "use_dora": false,
 
33
  "use_rslora": false
34
  }
 
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "microsoft/Phi-3-mini-4k-instruct",
5
  "bias": "none",
6
+ "corda_config": null,
7
  "eva_config": null,
8
  "exclude_modules": null,
9
  "fan_in_fan_out": false,
 
20
  "megatron_core": "megatron.core",
21
  "modules_to_save": null,
22
  "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
  "r": 8,
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
 
28
  "qkv_proj",
29
  "o_proj",
30
+ "gate_up_proj",
31
+ "down_proj"
32
  ],
33
  "task_type": "CAUSAL_LM",
34
+ "trainable_token_indices": null,
35
  "use_dora": false,
36
+ "use_qalora": false,
37
  "use_rslora": false
38
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a564010b3a53a0d28e23c4d80c1e83979d88658f9a0e05141e41a6b34cdba912
3
  size 50365768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:221950aeb38a304645558d203cc132201d1d04097a0f943681a39a0926ebf2e3
3
  size 50365768
chat_template.jinja ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>
2
+ ' + message['content'] + '<|end|>
3
+ '}}{% elif message['role'] == 'user' %}{{'<|user|>
4
+ ' + message['content'] + '<|end|>
5
+ '}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>
6
+ ' + message['content'] + '<|end|>
7
+ '}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>
8
+ ' }}{% else %}{{ eos_token }}{% endif %}
special_tokens_map.json CHANGED
@@ -13,13 +13,7 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": {
17
- "content": "<|endoftext|>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
  "unk_token": {
24
  "content": "<unk>",
25
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "<unk>",
 
 
 
 
 
 
17
  "unk_token": {
18
  "content": "<unk>",
19
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -117,13 +117,12 @@
117
  }
118
  },
119
  "bos_token": "<s>",
120
- "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
121
  "clean_up_tokenization_spaces": false,
122
  "eos_token": "<|endoftext|>",
123
  "extra_special_tokens": {},
124
  "legacy": false,
125
  "model_max_length": 4096,
126
- "pad_token": "<|endoftext|>",
127
  "padding_side": "left",
128
  "sp_model_kwargs": {},
129
  "tokenizer_class": "LlamaTokenizer",
 
117
  }
118
  },
119
  "bos_token": "<s>",
 
120
  "clean_up_tokenization_spaces": false,
121
  "eos_token": "<|endoftext|>",
122
  "extra_special_tokens": {},
123
  "legacy": false,
124
  "model_max_length": 4096,
125
+ "pad_token": "<unk>",
126
  "padding_side": "left",
127
  "sp_model_kwargs": {},
128
  "tokenizer_class": "LlamaTokenizer",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd65c078e6073e339eedbcc6fa7fa8d015ab496efcba3459bdc4c6e58e9e7ac8
3
- size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3f58672029b624a6f0a550aaa55435f50de5afe90321f6a403caf6af205ae77
3
+ size 5688