maikezu commited on
Commit
b16078e
·
verified ·
1 Parent(s): 56efb59

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +11 -456
  2. tokenizer.json +2 -2
  3. tokenizer_config.json +1 -551
special_tokens_map.json CHANGED
@@ -1,460 +1,9 @@
1
  {
2
  "additional_special_tokens": [
3
- {
4
- "content": "[SILENCE_PAD]",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- {
11
- "content": "[UTTERANCE_PAD]",
12
- "lstrip": false,
13
- "normalized": false,
14
- "rstrip": false,
15
- "single_word": false
16
- },
17
- {
18
- "content": "[WORD_PAD]",
19
- "lstrip": false,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false
23
- },
24
- {
25
- "content": "[BC]",
26
- "lstrip": false,
27
- "normalized": false,
28
- "rstrip": false,
29
- "single_word": false
30
- },
31
- {
32
- "content": "[INTER]",
33
- "lstrip": false,
34
- "normalized": false,
35
- "rstrip": false,
36
- "single_word": false
37
- },
38
- {
39
- "content": "[INTERRUPT1]",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false
44
- },
45
- {
46
- "content": "[INTERRUPT2]",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false
51
- },
52
- {
53
- "content": "[INTERRUPT3]",
54
- "lstrip": false,
55
- "normalized": false,
56
- "rstrip": false,
57
- "single_word": false
58
- },
59
- {
60
- "content": "[INTERRUPT4]",
61
- "lstrip": false,
62
- "normalized": false,
63
- "rstrip": false,
64
- "single_word": false
65
- },
66
- {
67
- "content": "[INTERRUPT5]",
68
- "lstrip": false,
69
- "normalized": false,
70
- "rstrip": false,
71
- "single_word": false
72
- },
73
- {
74
- "content": "[INTERRUPT6]",
75
- "lstrip": false,
76
- "normalized": false,
77
- "rstrip": false,
78
- "single_word": false
79
- },
80
- {
81
- "content": "[INTERRUPT7]",
82
- "lstrip": false,
83
- "normalized": false,
84
- "rstrip": false,
85
- "single_word": false
86
- },
87
- {
88
- "content": "[INTERRUPT8]",
89
- "lstrip": false,
90
- "normalized": false,
91
- "rstrip": false,
92
- "single_word": false
93
- },
94
- {
95
- "content": "[INTERRUPT9]",
96
- "lstrip": false,
97
- "normalized": false,
98
- "rstrip": false,
99
- "single_word": false
100
- },
101
- {
102
- "content": "[INTERRUPT10]",
103
- "lstrip": false,
104
- "normalized": false,
105
- "rstrip": false,
106
- "single_word": false
107
- },
108
- {
109
- "content": "[INTERRUPT11]",
110
- "lstrip": false,
111
- "normalized": false,
112
- "rstrip": false,
113
- "single_word": false
114
- },
115
- {
116
- "content": "[INTERRUPT12]",
117
- "lstrip": false,
118
- "normalized": false,
119
- "rstrip": false,
120
- "single_word": false
121
- },
122
- {
123
- "content": "[INTERRUPT13]",
124
- "lstrip": false,
125
- "normalized": false,
126
- "rstrip": false,
127
- "single_word": false
128
- },
129
- {
130
- "content": "[INTERRUPT14]",
131
- "lstrip": false,
132
- "normalized": false,
133
- "rstrip": false,
134
- "single_word": false
135
- },
136
- {
137
- "content": "[INTERRUPT15]",
138
- "lstrip": false,
139
- "normalized": false,
140
- "rstrip": false,
141
- "single_word": false
142
- },
143
- {
144
- "content": "[INTERRUPT16]",
145
- "lstrip": false,
146
- "normalized": false,
147
- "rstrip": false,
148
- "single_word": false
149
- },
150
- {
151
- "content": "[INTERRUPT17]",
152
- "lstrip": false,
153
- "normalized": false,
154
- "rstrip": false,
155
- "single_word": false
156
- },
157
- {
158
- "content": "[INTERRUPT18]",
159
- "lstrip": false,
160
- "normalized": false,
161
- "rstrip": false,
162
- "single_word": false
163
- },
164
- {
165
- "content": "[INTERRUPT19]",
166
- "lstrip": false,
167
- "normalized": false,
168
- "rstrip": false,
169
- "single_word": false
170
- },
171
- {
172
- "content": "[INTERRUPT20]",
173
- "lstrip": false,
174
- "normalized": false,
175
- "rstrip": false,
176
- "single_word": false
177
- },
178
- {
179
- "content": "[BC1]",
180
- "lstrip": false,
181
- "normalized": false,
182
- "rstrip": false,
183
- "single_word": false
184
- },
185
- {
186
- "content": "[BC2]",
187
- "lstrip": false,
188
- "normalized": false,
189
- "rstrip": false,
190
- "single_word": false
191
- },
192
- {
193
- "content": "[BC3]",
194
- "lstrip": false,
195
- "normalized": false,
196
- "rstrip": false,
197
- "single_word": false
198
- },
199
- {
200
- "content": "[BC4]",
201
- "lstrip": false,
202
- "normalized": false,
203
- "rstrip": false,
204
- "single_word": false
205
- },
206
- {
207
- "content": "[BC5]",
208
- "lstrip": false,
209
- "normalized": false,
210
- "rstrip": false,
211
- "single_word": false
212
- },
213
- {
214
- "content": "[BC6]",
215
- "lstrip": false,
216
- "normalized": false,
217
- "rstrip": false,
218
- "single_word": false
219
- },
220
- {
221
- "content": "[BC7]",
222
- "lstrip": false,
223
- "normalized": false,
224
- "rstrip": false,
225
- "single_word": false
226
- },
227
- {
228
- "content": "[BC8]",
229
- "lstrip": false,
230
- "normalized": false,
231
- "rstrip": false,
232
- "single_word": false
233
- },
234
- {
235
- "content": "[BC9]",
236
- "lstrip": false,
237
- "normalized": false,
238
- "rstrip": false,
239
- "single_word": false
240
- },
241
- {
242
- "content": "[BC10]",
243
- "lstrip": false,
244
- "normalized": false,
245
- "rstrip": false,
246
- "single_word": false
247
- },
248
- {
249
- "content": "[BC11]",
250
- "lstrip": false,
251
- "normalized": false,
252
- "rstrip": false,
253
- "single_word": false
254
- },
255
- {
256
- "content": "[BC12]",
257
- "lstrip": false,
258
- "normalized": false,
259
- "rstrip": false,
260
- "single_word": false
261
- },
262
- {
263
- "content": "[BC13]",
264
- "lstrip": false,
265
- "normalized": false,
266
- "rstrip": false,
267
- "single_word": false
268
- },
269
- {
270
- "content": "[BC14]",
271
- "lstrip": false,
272
- "normalized": false,
273
- "rstrip": false,
274
- "single_word": false
275
- },
276
- {
277
- "content": "[BC15]",
278
- "lstrip": false,
279
- "normalized": false,
280
- "rstrip": false,
281
- "single_word": false
282
- },
283
- {
284
- "content": "[BC16]",
285
- "lstrip": false,
286
- "normalized": false,
287
- "rstrip": false,
288
- "single_word": false
289
- },
290
- {
291
- "content": "[BC17]",
292
- "lstrip": false,
293
- "normalized": false,
294
- "rstrip": false,
295
- "single_word": false
296
- },
297
- {
298
- "content": "[BC18]",
299
- "lstrip": false,
300
- "normalized": false,
301
- "rstrip": false,
302
- "single_word": false
303
- },
304
- {
305
- "content": "[BC19]",
306
- "lstrip": false,
307
- "normalized": false,
308
- "rstrip": false,
309
- "single_word": false
310
- },
311
- {
312
- "content": "[BC20]",
313
- "lstrip": false,
314
- "normalized": false,
315
- "rstrip": false,
316
- "single_word": false
317
- },
318
- {
319
- "content": "[BC21]",
320
- "lstrip": false,
321
- "normalized": false,
322
- "rstrip": false,
323
- "single_word": false
324
- },
325
- {
326
- "content": "[BC22]",
327
- "lstrip": false,
328
- "normalized": false,
329
- "rstrip": false,
330
- "single_word": false
331
- },
332
- {
333
- "content": "[BC23]",
334
- "lstrip": false,
335
- "normalized": false,
336
- "rstrip": false,
337
- "single_word": false
338
- },
339
- {
340
- "content": "[BC24]",
341
- "lstrip": false,
342
- "normalized": false,
343
- "rstrip": false,
344
- "single_word": false
345
- },
346
- {
347
- "content": "[BC25]",
348
- "lstrip": false,
349
- "normalized": false,
350
- "rstrip": false,
351
- "single_word": false
352
- },
353
- {
354
- "content": "[BC26]",
355
- "lstrip": false,
356
- "normalized": false,
357
- "rstrip": false,
358
- "single_word": false
359
- },
360
- {
361
- "content": "[BC27]",
362
- "lstrip": false,
363
- "normalized": false,
364
- "rstrip": false,
365
- "single_word": false
366
- },
367
- {
368
- "content": "[BC28]",
369
- "lstrip": false,
370
- "normalized": false,
371
- "rstrip": false,
372
- "single_word": false
373
- },
374
- {
375
- "content": "[BC29]",
376
- "lstrip": false,
377
- "normalized": false,
378
- "rstrip": false,
379
- "single_word": false
380
- },
381
- {
382
- "content": "[BC30]",
383
- "lstrip": false,
384
- "normalized": false,
385
- "rstrip": false,
386
- "single_word": false
387
- },
388
- {
389
- "content": "[BC31]",
390
- "lstrip": false,
391
- "normalized": false,
392
- "rstrip": false,
393
- "single_word": false
394
- },
395
- {
396
- "content": "[BC32]",
397
- "lstrip": false,
398
- "normalized": false,
399
- "rstrip": false,
400
- "single_word": false
401
- },
402
- {
403
- "content": "[BC33]",
404
- "lstrip": false,
405
- "normalized": false,
406
- "rstrip": false,
407
- "single_word": false
408
- },
409
- {
410
- "content": "[BC34]",
411
- "lstrip": false,
412
- "normalized": false,
413
- "rstrip": false,
414
- "single_word": false
415
- },
416
- {
417
- "content": "[BC35]",
418
- "lstrip": false,
419
- "normalized": false,
420
- "rstrip": false,
421
- "single_word": false
422
- },
423
- {
424
- "content": "[BC36]",
425
- "lstrip": false,
426
- "normalized": false,
427
- "rstrip": false,
428
- "single_word": false
429
- },
430
- {
431
- "content": "[BC37]",
432
- "lstrip": false,
433
- "normalized": false,
434
- "rstrip": false,
435
- "single_word": false
436
- },
437
- {
438
- "content": "[BC38]",
439
- "lstrip": false,
440
- "normalized": false,
441
- "rstrip": false,
442
- "single_word": false
443
- },
444
- {
445
- "content": "[BC39]",
446
- "lstrip": false,
447
- "normalized": false,
448
- "rstrip": false,
449
- "single_word": false
450
- },
451
- {
452
- "content": "[BC40]",
453
- "lstrip": false,
454
- "normalized": false,
455
- "rstrip": false,
456
- "single_word": false
457
- }
458
  ],
459
  "bos_token": {
460
  "content": "<|begin_of_text|>",
@@ -470,5 +19,11 @@
470
  "rstrip": false,
471
  "single_word": false
472
  },
473
- "pad_token": "<|eot_id|>"
 
 
 
 
 
 
474
  }
 
1
  {
2
  "additional_special_tokens": [
3
+ "[SILENCE_PAD]",
4
+ "[UTTERANCE_PAD]",
5
+ "[WORD_PAD]",
6
+ "[BC]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  ],
8
  "bos_token": {
9
  "content": "<|begin_of_text|>",
 
19
  "rstrip": false,
20
  "single_word": false
21
  },
22
+ "pad_token": {
23
+ "content": "<|eot_id|>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false
28
+ }
29
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0182b2221a86c9c9b563410d38160bbf9b1e2d8633027cb2a0695eceb991baf
3
- size 17222233
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94ab28ded5e09feacfd3036e8038bef0e2f7fd23f6a30721eec01b1f8f5ae3ab
3
+ size 17210947
tokenizer_config.json CHANGED
@@ -2079,562 +2079,13 @@
2079
  "rstrip": false,
2080
  "single_word": false,
2081
  "special": true
2082
- },
2083
- "128260": {
2084
- "content": "[INTER]",
2085
- "lstrip": false,
2086
- "normalized": false,
2087
- "rstrip": false,
2088
- "single_word": false,
2089
- "special": true
2090
- },
2091
- "128261": {
2092
- "content": "[INTERRUPT1]",
2093
- "lstrip": false,
2094
- "normalized": false,
2095
- "rstrip": false,
2096
- "single_word": false,
2097
- "special": true
2098
- },
2099
- "128262": {
2100
- "content": "[INTERRUPT2]",
2101
- "lstrip": false,
2102
- "normalized": false,
2103
- "rstrip": false,
2104
- "single_word": false,
2105
- "special": true
2106
- },
2107
- "128263": {
2108
- "content": "[INTERRUPT3]",
2109
- "lstrip": false,
2110
- "normalized": false,
2111
- "rstrip": false,
2112
- "single_word": false,
2113
- "special": true
2114
- },
2115
- "128264": {
2116
- "content": "[INTERRUPT4]",
2117
- "lstrip": false,
2118
- "normalized": false,
2119
- "rstrip": false,
2120
- "single_word": false,
2121
- "special": true
2122
- },
2123
- "128265": {
2124
- "content": "[INTERRUPT5]",
2125
- "lstrip": false,
2126
- "normalized": false,
2127
- "rstrip": false,
2128
- "single_word": false,
2129
- "special": true
2130
- },
2131
- "128266": {
2132
- "content": "[INTERRUPT6]",
2133
- "lstrip": false,
2134
- "normalized": false,
2135
- "rstrip": false,
2136
- "single_word": false,
2137
- "special": true
2138
- },
2139
- "128267": {
2140
- "content": "[INTERRUPT7]",
2141
- "lstrip": false,
2142
- "normalized": false,
2143
- "rstrip": false,
2144
- "single_word": false,
2145
- "special": true
2146
- },
2147
- "128268": {
2148
- "content": "[INTERRUPT8]",
2149
- "lstrip": false,
2150
- "normalized": false,
2151
- "rstrip": false,
2152
- "single_word": false,
2153
- "special": true
2154
- },
2155
- "128269": {
2156
- "content": "[INTERRUPT9]",
2157
- "lstrip": false,
2158
- "normalized": false,
2159
- "rstrip": false,
2160
- "single_word": false,
2161
- "special": true
2162
- },
2163
- "128270": {
2164
- "content": "[INTERRUPT10]",
2165
- "lstrip": false,
2166
- "normalized": false,
2167
- "rstrip": false,
2168
- "single_word": false,
2169
- "special": true
2170
- },
2171
- "128271": {
2172
- "content": "[INTERRUPT11]",
2173
- "lstrip": false,
2174
- "normalized": false,
2175
- "rstrip": false,
2176
- "single_word": false,
2177
- "special": true
2178
- },
2179
- "128272": {
2180
- "content": "[INTERRUPT12]",
2181
- "lstrip": false,
2182
- "normalized": false,
2183
- "rstrip": false,
2184
- "single_word": false,
2185
- "special": true
2186
- },
2187
- "128273": {
2188
- "content": "[INTERRUPT13]",
2189
- "lstrip": false,
2190
- "normalized": false,
2191
- "rstrip": false,
2192
- "single_word": false,
2193
- "special": true
2194
- },
2195
- "128274": {
2196
- "content": "[INTERRUPT14]",
2197
- "lstrip": false,
2198
- "normalized": false,
2199
- "rstrip": false,
2200
- "single_word": false,
2201
- "special": true
2202
- },
2203
- "128275": {
2204
- "content": "[INTERRUPT15]",
2205
- "lstrip": false,
2206
- "normalized": false,
2207
- "rstrip": false,
2208
- "single_word": false,
2209
- "special": true
2210
- },
2211
- "128276": {
2212
- "content": "[INTERRUPT16]",
2213
- "lstrip": false,
2214
- "normalized": false,
2215
- "rstrip": false,
2216
- "single_word": false,
2217
- "special": true
2218
- },
2219
- "128277": {
2220
- "content": "[INTERRUPT17]",
2221
- "lstrip": false,
2222
- "normalized": false,
2223
- "rstrip": false,
2224
- "single_word": false,
2225
- "special": true
2226
- },
2227
- "128278": {
2228
- "content": "[INTERRUPT18]",
2229
- "lstrip": false,
2230
- "normalized": false,
2231
- "rstrip": false,
2232
- "single_word": false,
2233
- "special": true
2234
- },
2235
- "128279": {
2236
- "content": "[INTERRUPT19]",
2237
- "lstrip": false,
2238
- "normalized": false,
2239
- "rstrip": false,
2240
- "single_word": false,
2241
- "special": true
2242
- },
2243
- "128280": {
2244
- "content": "[INTERRUPT20]",
2245
- "lstrip": false,
2246
- "normalized": false,
2247
- "rstrip": false,
2248
- "single_word": false,
2249
- "special": true
2250
- },
2251
- "128281": {
2252
- "content": "[BC1]",
2253
- "lstrip": false,
2254
- "normalized": false,
2255
- "rstrip": false,
2256
- "single_word": false,
2257
- "special": true
2258
- },
2259
- "128282": {
2260
- "content": "[BC2]",
2261
- "lstrip": false,
2262
- "normalized": false,
2263
- "rstrip": false,
2264
- "single_word": false,
2265
- "special": true
2266
- },
2267
- "128283": {
2268
- "content": "[BC3]",
2269
- "lstrip": false,
2270
- "normalized": false,
2271
- "rstrip": false,
2272
- "single_word": false,
2273
- "special": true
2274
- },
2275
- "128284": {
2276
- "content": "[BC4]",
2277
- "lstrip": false,
2278
- "normalized": false,
2279
- "rstrip": false,
2280
- "single_word": false,
2281
- "special": true
2282
- },
2283
- "128285": {
2284
- "content": "[BC5]",
2285
- "lstrip": false,
2286
- "normalized": false,
2287
- "rstrip": false,
2288
- "single_word": false,
2289
- "special": true
2290
- },
2291
- "128286": {
2292
- "content": "[BC6]",
2293
- "lstrip": false,
2294
- "normalized": false,
2295
- "rstrip": false,
2296
- "single_word": false,
2297
- "special": true
2298
- },
2299
- "128287": {
2300
- "content": "[BC7]",
2301
- "lstrip": false,
2302
- "normalized": false,
2303
- "rstrip": false,
2304
- "single_word": false,
2305
- "special": true
2306
- },
2307
- "128288": {
2308
- "content": "[BC8]",
2309
- "lstrip": false,
2310
- "normalized": false,
2311
- "rstrip": false,
2312
- "single_word": false,
2313
- "special": true
2314
- },
2315
- "128289": {
2316
- "content": "[BC9]",
2317
- "lstrip": false,
2318
- "normalized": false,
2319
- "rstrip": false,
2320
- "single_word": false,
2321
- "special": true
2322
- },
2323
- "128290": {
2324
- "content": "[BC10]",
2325
- "lstrip": false,
2326
- "normalized": false,
2327
- "rstrip": false,
2328
- "single_word": false,
2329
- "special": true
2330
- },
2331
- "128291": {
2332
- "content": "[BC11]",
2333
- "lstrip": false,
2334
- "normalized": false,
2335
- "rstrip": false,
2336
- "single_word": false,
2337
- "special": true
2338
- },
2339
- "128292": {
2340
- "content": "[BC12]",
2341
- "lstrip": false,
2342
- "normalized": false,
2343
- "rstrip": false,
2344
- "single_word": false,
2345
- "special": true
2346
- },
2347
- "128293": {
2348
- "content": "[BC13]",
2349
- "lstrip": false,
2350
- "normalized": false,
2351
- "rstrip": false,
2352
- "single_word": false,
2353
- "special": true
2354
- },
2355
- "128294": {
2356
- "content": "[BC14]",
2357
- "lstrip": false,
2358
- "normalized": false,
2359
- "rstrip": false,
2360
- "single_word": false,
2361
- "special": true
2362
- },
2363
- "128295": {
2364
- "content": "[BC15]",
2365
- "lstrip": false,
2366
- "normalized": false,
2367
- "rstrip": false,
2368
- "single_word": false,
2369
- "special": true
2370
- },
2371
- "128296": {
2372
- "content": "[BC16]",
2373
- "lstrip": false,
2374
- "normalized": false,
2375
- "rstrip": false,
2376
- "single_word": false,
2377
- "special": true
2378
- },
2379
- "128297": {
2380
- "content": "[BC17]",
2381
- "lstrip": false,
2382
- "normalized": false,
2383
- "rstrip": false,
2384
- "single_word": false,
2385
- "special": true
2386
- },
2387
- "128298": {
2388
- "content": "[BC18]",
2389
- "lstrip": false,
2390
- "normalized": false,
2391
- "rstrip": false,
2392
- "single_word": false,
2393
- "special": true
2394
- },
2395
- "128299": {
2396
- "content": "[BC19]",
2397
- "lstrip": false,
2398
- "normalized": false,
2399
- "rstrip": false,
2400
- "single_word": false,
2401
- "special": true
2402
- },
2403
- "128300": {
2404
- "content": "[BC20]",
2405
- "lstrip": false,
2406
- "normalized": false,
2407
- "rstrip": false,
2408
- "single_word": false,
2409
- "special": true
2410
- },
2411
- "128301": {
2412
- "content": "[BC21]",
2413
- "lstrip": false,
2414
- "normalized": false,
2415
- "rstrip": false,
2416
- "single_word": false,
2417
- "special": true
2418
- },
2419
- "128302": {
2420
- "content": "[BC22]",
2421
- "lstrip": false,
2422
- "normalized": false,
2423
- "rstrip": false,
2424
- "single_word": false,
2425
- "special": true
2426
- },
2427
- "128303": {
2428
- "content": "[BC23]",
2429
- "lstrip": false,
2430
- "normalized": false,
2431
- "rstrip": false,
2432
- "single_word": false,
2433
- "special": true
2434
- },
2435
- "128304": {
2436
- "content": "[BC24]",
2437
- "lstrip": false,
2438
- "normalized": false,
2439
- "rstrip": false,
2440
- "single_word": false,
2441
- "special": true
2442
- },
2443
- "128305": {
2444
- "content": "[BC25]",
2445
- "lstrip": false,
2446
- "normalized": false,
2447
- "rstrip": false,
2448
- "single_word": false,
2449
- "special": true
2450
- },
2451
- "128306": {
2452
- "content": "[BC26]",
2453
- "lstrip": false,
2454
- "normalized": false,
2455
- "rstrip": false,
2456
- "single_word": false,
2457
- "special": true
2458
- },
2459
- "128307": {
2460
- "content": "[BC27]",
2461
- "lstrip": false,
2462
- "normalized": false,
2463
- "rstrip": false,
2464
- "single_word": false,
2465
- "special": true
2466
- },
2467
- "128308": {
2468
- "content": "[BC28]",
2469
- "lstrip": false,
2470
- "normalized": false,
2471
- "rstrip": false,
2472
- "single_word": false,
2473
- "special": true
2474
- },
2475
- "128309": {
2476
- "content": "[BC29]",
2477
- "lstrip": false,
2478
- "normalized": false,
2479
- "rstrip": false,
2480
- "single_word": false,
2481
- "special": true
2482
- },
2483
- "128310": {
2484
- "content": "[BC30]",
2485
- "lstrip": false,
2486
- "normalized": false,
2487
- "rstrip": false,
2488
- "single_word": false,
2489
- "special": true
2490
- },
2491
- "128311": {
2492
- "content": "[BC31]",
2493
- "lstrip": false,
2494
- "normalized": false,
2495
- "rstrip": false,
2496
- "single_word": false,
2497
- "special": true
2498
- },
2499
- "128312": {
2500
- "content": "[BC32]",
2501
- "lstrip": false,
2502
- "normalized": false,
2503
- "rstrip": false,
2504
- "single_word": false,
2505
- "special": true
2506
- },
2507
- "128313": {
2508
- "content": "[BC33]",
2509
- "lstrip": false,
2510
- "normalized": false,
2511
- "rstrip": false,
2512
- "single_word": false,
2513
- "special": true
2514
- },
2515
- "128314": {
2516
- "content": "[BC34]",
2517
- "lstrip": false,
2518
- "normalized": false,
2519
- "rstrip": false,
2520
- "single_word": false,
2521
- "special": true
2522
- },
2523
- "128315": {
2524
- "content": "[BC35]",
2525
- "lstrip": false,
2526
- "normalized": false,
2527
- "rstrip": false,
2528
- "single_word": false,
2529
- "special": true
2530
- },
2531
- "128316": {
2532
- "content": "[BC36]",
2533
- "lstrip": false,
2534
- "normalized": false,
2535
- "rstrip": false,
2536
- "single_word": false,
2537
- "special": true
2538
- },
2539
- "128317": {
2540
- "content": "[BC37]",
2541
- "lstrip": false,
2542
- "normalized": false,
2543
- "rstrip": false,
2544
- "single_word": false,
2545
- "special": true
2546
- },
2547
- "128318": {
2548
- "content": "[BC38]",
2549
- "lstrip": false,
2550
- "normalized": false,
2551
- "rstrip": false,
2552
- "single_word": false,
2553
- "special": true
2554
- },
2555
- "128319": {
2556
- "content": "[BC39]",
2557
- "lstrip": false,
2558
- "normalized": false,
2559
- "rstrip": false,
2560
- "single_word": false,
2561
- "special": true
2562
- },
2563
- "128320": {
2564
- "content": "[BC40]",
2565
- "lstrip": false,
2566
- "normalized": false,
2567
- "rstrip": false,
2568
- "single_word": false,
2569
- "special": true
2570
  }
2571
  },
2572
  "additional_special_tokens": [
2573
  "[SILENCE_PAD]",
2574
  "[UTTERANCE_PAD]",
2575
  "[WORD_PAD]",
2576
- "[BC]",
2577
- "[INTER]",
2578
- "[INTERRUPT1]",
2579
- "[INTERRUPT2]",
2580
- "[INTERRUPT3]",
2581
- "[INTERRUPT4]",
2582
- "[INTERRUPT5]",
2583
- "[INTERRUPT6]",
2584
- "[INTERRUPT7]",
2585
- "[INTERRUPT8]",
2586
- "[INTERRUPT9]",
2587
- "[INTERRUPT10]",
2588
- "[INTERRUPT11]",
2589
- "[INTERRUPT12]",
2590
- "[INTERRUPT13]",
2591
- "[INTERRUPT14]",
2592
- "[INTERRUPT15]",
2593
- "[INTERRUPT16]",
2594
- "[INTERRUPT17]",
2595
- "[INTERRUPT18]",
2596
- "[INTERRUPT19]",
2597
- "[INTERRUPT20]",
2598
- "[BC1]",
2599
- "[BC2]",
2600
- "[BC3]",
2601
- "[BC4]",
2602
- "[BC5]",
2603
- "[BC6]",
2604
- "[BC7]",
2605
- "[BC8]",
2606
- "[BC9]",
2607
- "[BC10]",
2608
- "[BC11]",
2609
- "[BC12]",
2610
- "[BC13]",
2611
- "[BC14]",
2612
- "[BC15]",
2613
- "[BC16]",
2614
- "[BC17]",
2615
- "[BC18]",
2616
- "[BC19]",
2617
- "[BC20]",
2618
- "[BC21]",
2619
- "[BC22]",
2620
- "[BC23]",
2621
- "[BC24]",
2622
- "[BC25]",
2623
- "[BC26]",
2624
- "[BC27]",
2625
- "[BC28]",
2626
- "[BC29]",
2627
- "[BC30]",
2628
- "[BC31]",
2629
- "[BC32]",
2630
- "[BC33]",
2631
- "[BC34]",
2632
- "[BC35]",
2633
- "[BC36]",
2634
- "[BC37]",
2635
- "[BC38]",
2636
- "[BC39]",
2637
- "[BC40]"
2638
  ],
2639
  "bos_token": "<|begin_of_text|>",
2640
  "clean_up_tokenization_spaces": true,
@@ -2651,7 +2102,6 @@
2651
  "pad_token": "<|eot_id|>",
2652
  "pad_token_type_id": 0,
2653
  "padding_side": "right",
2654
- "rust_remote_code": true,
2655
  "stride": 0,
2656
  "tokenizer_class": "PreTrainedTokenizerFast",
2657
  "truncation_side": "right",
 
2079
  "rstrip": false,
2080
  "single_word": false,
2081
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2082
  }
2083
  },
2084
  "additional_special_tokens": [
2085
  "[SILENCE_PAD]",
2086
  "[UTTERANCE_PAD]",
2087
  "[WORD_PAD]",
2088
+ "[BC]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2089
  ],
2090
  "bos_token": "<|begin_of_text|>",
2091
  "clean_up_tokenization_spaces": true,
 
2102
  "pad_token": "<|eot_id|>",
2103
  "pad_token_type_id": 0,
2104
  "padding_side": "right",
 
2105
  "stride": 0,
2106
  "tokenizer_class": "PreTrainedTokenizerFast",
2107
  "truncation_side": "right",