robertou2 commited on
Commit
71ff54a
·
verified ·
1 Parent(s): a0503bf

Upload folder using huggingface_hub

Browse files
added_tokens.json CHANGED
@@ -2,6 +2,7 @@
2
  "<|assistant|>": 32001,
3
  "<|endoftext|>": 32000,
4
  "<|end|>": 32007,
 
5
  "<|placeholder1|>": 32002,
6
  "<|placeholder2|>": 32003,
7
  "<|placeholder3|>": 32004,
 
2
  "<|assistant|>": 32001,
3
  "<|endoftext|>": 32000,
4
  "<|end|>": 32007,
5
+ "<|pad|>": 32011,
6
  "<|placeholder1|>": 32002,
7
  "<|placeholder2|>": 32003,
8
  "<|placeholder3|>": 32004,
special_tokens_map.json CHANGED
@@ -7,14 +7,14 @@
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|endoftext|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
- "content": "<|endoftext|>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
 
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|end|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
+ "content": "<|pad|>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,7 +1,21 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
@@ -98,7 +112,7 @@
98
  "content": "<|end|>",
99
  "single_word": false,
100
  "lstrip": false,
101
- "rstrip": true,
102
  "normalized": false,
103
  "special": true
104
  },
@@ -128,6 +142,15 @@
128
  "rstrip": true,
129
  "normalized": false,
130
  "special": true
 
 
 
 
 
 
 
 
 
131
  }
132
  ],
133
  "normalizer": {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 4096,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 4096
12
+ },
13
+ "direction": "Left",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 32011,
16
+ "pad_type_id": 0,
17
+ "pad_token": "<|pad|>"
18
+ },
19
  "added_tokens": [
20
  {
21
  "id": 0,
 
112
  "content": "<|end|>",
113
  "single_word": false,
114
  "lstrip": false,
115
+ "rstrip": false,
116
  "normalized": false,
117
  "special": true
118
  },
 
142
  "rstrip": true,
143
  "normalized": false,
144
  "special": true
145
+ },
146
+ {
147
+ "id": 32011,
148
+ "content": "<|pad|>",
149
+ "single_word": false,
150
+ "lstrip": false,
151
+ "rstrip": false,
152
+ "normalized": false,
153
+ "special": true
154
  }
155
  ],
156
  "normalizer": {
tokenizer_config.json CHANGED
@@ -87,7 +87,7 @@
87
  "content": "<|end|>",
88
  "lstrip": false,
89
  "normalized": false,
90
- "rstrip": true,
91
  "single_word": false,
92
  "special": true
93
  },
@@ -114,16 +114,24 @@
114
  "rstrip": true,
115
  "single_word": false,
116
  "special": true
 
 
 
 
 
 
 
 
117
  }
118
  },
119
  "bos_token": "<s>",
120
  "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
121
  "clean_up_tokenization_spaces": false,
122
- "eos_token": "<|endoftext|>",
123
  "extra_special_tokens": {},
124
  "legacy": false,
125
  "model_max_length": 4096,
126
- "pad_token": "<|endoftext|>",
127
  "padding_side": "left",
128
  "sp_model_kwargs": {},
129
  "tokenizer_class": "LlamaTokenizer",
 
87
  "content": "<|end|>",
88
  "lstrip": false,
89
  "normalized": false,
90
+ "rstrip": false,
91
  "single_word": false,
92
  "special": true
93
  },
 
114
  "rstrip": true,
115
  "single_word": false,
116
  "special": true
117
+ },
118
+ "32011": {
119
+ "content": "<|pad|>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false,
124
+ "special": true
125
  }
126
  },
127
  "bos_token": "<s>",
128
  "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
129
  "clean_up_tokenization_spaces": false,
130
+ "eos_token": "<|end|>",
131
  "extra_special_tokens": {},
132
  "legacy": false,
133
  "model_max_length": 4096,
134
+ "pad_token": "<|pad|>",
135
  "padding_side": "left",
136
  "sp_model_kwargs": {},
137
  "tokenizer_class": "LlamaTokenizer",