Kukedlc commited on
Commit
30d8cc3
·
verified ·
1 Parent(s): 1701718

Add adapter_phase1/tokenizer_config.json (ckpt-506 final phase 1)

Browse files
Files changed (1) hide show
  1. adapter_phase1/tokenizer_config.json +298 -0
adapter_phase1/tokenizer_config.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "audio_bos_token": "<|audio_start|>",
4
+ "audio_eos_token": "<|audio_end|>",
5
+ "audio_token": "<|audio_pad|>",
6
+ "backend": "tokenizers",
7
+ "bos_token": null,
8
+ "clean_up_tokenization_spaces": false,
9
+ "eos_token": "<|im_end|>",
10
+ "errors": "replace",
11
+ "image_token": "<|image_pad|>",
12
+ "is_local": false,
13
+ "model_max_length": 262144,
14
+ "model_specific_special_tokens": {
15
+ "audio_bos_token": "<|audio_start|>",
16
+ "audio_eos_token": "<|audio_end|>",
17
+ "audio_token": "<|audio_pad|>",
18
+ "image_token": "<|image_pad|>",
19
+ "video_token": "<|video_pad|>",
20
+ "vision_bos_token": "<|vision_start|>",
21
+ "vision_eos_token": "<|vision_end|>"
22
+ },
23
+ "pad_token": "<|endoftext|>",
24
+ "padding_side": "right",
25
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
26
+ "split_special_tokens": false,
27
+ "tokenizer_class": "TokenizersBackend",
28
+ "unk_token": null,
29
+ "video_token": "<|video_pad|>",
30
+ "vision_bos_token": "<|vision_start|>",
31
+ "vision_eos_token": "<|vision_end|>",
32
+ "added_tokens_decoder": {
33
+ "248044": {
34
+ "content": "<|endoftext|>",
35
+ "single_word": false,
36
+ "lstrip": false,
37
+ "rstrip": false,
38
+ "normalized": false,
39
+ "special": true
40
+ },
41
+ "248045": {
42
+ "content": "<|im_start|>",
43
+ "single_word": false,
44
+ "lstrip": false,
45
+ "rstrip": false,
46
+ "normalized": false,
47
+ "special": true
48
+ },
49
+ "248046": {
50
+ "content": "<|im_end|>",
51
+ "single_word": false,
52
+ "lstrip": false,
53
+ "rstrip": false,
54
+ "normalized": false,
55
+ "special": true
56
+ },
57
+ "248047": {
58
+ "content": "<|object_ref_start|>",
59
+ "single_word": false,
60
+ "lstrip": false,
61
+ "rstrip": false,
62
+ "normalized": false,
63
+ "special": true
64
+ },
65
+ "248048": {
66
+ "content": "<|object_ref_end|>",
67
+ "single_word": false,
68
+ "lstrip": false,
69
+ "rstrip": false,
70
+ "normalized": false,
71
+ "special": true
72
+ },
73
+ "248049": {
74
+ "content": "<|box_start|>",
75
+ "single_word": false,
76
+ "lstrip": false,
77
+ "rstrip": false,
78
+ "normalized": false,
79
+ "special": true
80
+ },
81
+ "248050": {
82
+ "content": "<|box_end|>",
83
+ "single_word": false,
84
+ "lstrip": false,
85
+ "rstrip": false,
86
+ "normalized": false,
87
+ "special": true
88
+ },
89
+ "248051": {
90
+ "content": "<|quad_start|>",
91
+ "single_word": false,
92
+ "lstrip": false,
93
+ "rstrip": false,
94
+ "normalized": false,
95
+ "special": true
96
+ },
97
+ "248052": {
98
+ "content": "<|quad_end|>",
99
+ "single_word": false,
100
+ "lstrip": false,
101
+ "rstrip": false,
102
+ "normalized": false,
103
+ "special": true
104
+ },
105
+ "248053": {
106
+ "content": "<|vision_start|>",
107
+ "single_word": false,
108
+ "lstrip": false,
109
+ "rstrip": false,
110
+ "normalized": false,
111
+ "special": true
112
+ },
113
+ "248054": {
114
+ "content": "<|vision_end|>",
115
+ "single_word": false,
116
+ "lstrip": false,
117
+ "rstrip": false,
118
+ "normalized": false,
119
+ "special": true
120
+ },
121
+ "248055": {
122
+ "content": "<|vision_pad|>",
123
+ "single_word": false,
124
+ "lstrip": false,
125
+ "rstrip": false,
126
+ "normalized": false,
127
+ "special": true
128
+ },
129
+ "248056": {
130
+ "content": "<|image_pad|>",
131
+ "single_word": false,
132
+ "lstrip": false,
133
+ "rstrip": false,
134
+ "normalized": false,
135
+ "special": true
136
+ },
137
+ "248057": {
138
+ "content": "<|video_pad|>",
139
+ "single_word": false,
140
+ "lstrip": false,
141
+ "rstrip": false,
142
+ "normalized": false,
143
+ "special": true
144
+ },
145
+ "248058": {
146
+ "content": "<tool_call>",
147
+ "single_word": false,
148
+ "lstrip": false,
149
+ "rstrip": false,
150
+ "normalized": false,
151
+ "special": false
152
+ },
153
+ "248059": {
154
+ "content": "</tool_call>",
155
+ "single_word": false,
156
+ "lstrip": false,
157
+ "rstrip": false,
158
+ "normalized": false,
159
+ "special": false
160
+ },
161
+ "248060": {
162
+ "content": "<|fim_prefix|>",
163
+ "single_word": false,
164
+ "lstrip": false,
165
+ "rstrip": false,
166
+ "normalized": false,
167
+ "special": false
168
+ },
169
+ "248061": {
170
+ "content": "<|fim_middle|>",
171
+ "single_word": false,
172
+ "lstrip": false,
173
+ "rstrip": false,
174
+ "normalized": false,
175
+ "special": false
176
+ },
177
+ "248062": {
178
+ "content": "<|fim_suffix|>",
179
+ "single_word": false,
180
+ "lstrip": false,
181
+ "rstrip": false,
182
+ "normalized": false,
183
+ "special": false
184
+ },
185
+ "248063": {
186
+ "content": "<|fim_pad|>",
187
+ "single_word": false,
188
+ "lstrip": false,
189
+ "rstrip": false,
190
+ "normalized": false,
191
+ "special": false
192
+ },
193
+ "248064": {
194
+ "content": "<|repo_name|>",
195
+ "single_word": false,
196
+ "lstrip": false,
197
+ "rstrip": false,
198
+ "normalized": false,
199
+ "special": false
200
+ },
201
+ "248065": {
202
+ "content": "<|file_sep|>",
203
+ "single_word": false,
204
+ "lstrip": false,
205
+ "rstrip": false,
206
+ "normalized": false,
207
+ "special": false
208
+ },
209
+ "248066": {
210
+ "content": "<tool_response>",
211
+ "single_word": false,
212
+ "lstrip": false,
213
+ "rstrip": false,
214
+ "normalized": false,
215
+ "special": false
216
+ },
217
+ "248067": {
218
+ "content": "</tool_response>",
219
+ "single_word": false,
220
+ "lstrip": false,
221
+ "rstrip": false,
222
+ "normalized": false,
223
+ "special": false
224
+ },
225
+ "248068": {
226
+ "content": "<think>",
227
+ "single_word": false,
228
+ "lstrip": false,
229
+ "rstrip": false,
230
+ "normalized": false,
231
+ "special": false
232
+ },
233
+ "248069": {
234
+ "content": "</think>",
235
+ "single_word": false,
236
+ "lstrip": false,
237
+ "rstrip": false,
238
+ "normalized": false,
239
+ "special": false
240
+ },
241
+ "248070": {
242
+ "content": "<|audio_start|>",
243
+ "single_word": false,
244
+ "lstrip": false,
245
+ "rstrip": false,
246
+ "normalized": false,
247
+ "special": true
248
+ },
249
+ "248071": {
250
+ "content": "<|audio_end|>",
251
+ "single_word": false,
252
+ "lstrip": false,
253
+ "rstrip": false,
254
+ "normalized": false,
255
+ "special": true
256
+ },
257
+ "248072": {
258
+ "content": "<tts_pad>",
259
+ "single_word": false,
260
+ "lstrip": false,
261
+ "rstrip": false,
262
+ "normalized": false,
263
+ "special": true
264
+ },
265
+ "248073": {
266
+ "content": "<tts_text_bos>",
267
+ "single_word": false,
268
+ "lstrip": false,
269
+ "rstrip": false,
270
+ "normalized": false,
271
+ "special": true
272
+ },
273
+ "248074": {
274
+ "content": "<tts_text_eod>",
275
+ "single_word": false,
276
+ "lstrip": false,
277
+ "rstrip": false,
278
+ "normalized": false,
279
+ "special": true
280
+ },
281
+ "248075": {
282
+ "content": "<tts_text_bos_single>",
283
+ "single_word": false,
284
+ "lstrip": false,
285
+ "rstrip": false,
286
+ "normalized": false,
287
+ "special": true
288
+ },
289
+ "248076": {
290
+ "content": "<|audio_pad|>",
291
+ "single_word": false,
292
+ "lstrip": false,
293
+ "rstrip": false,
294
+ "normalized": false,
295
+ "special": true
296
+ }
297
+ }
298
+ }