hans00 commited on
Commit
171d2d6
·
verified ·
1 Parent(s): ac9239e

Initial: q4 single-file fork (from ipsilondev) with config.json + tokenizer.json fixes

Browse files

- Forked from ipsilondev/chatterbox-multilingual-ONNX-q4 (q4-quantized, single-file ONNX, no .onnx_data sibling)
- Added missing config.json (synthesized from English model + vocab_size=2454; use_external_data_format=false)
- Patched tokenizer.json — added BOS/EOS/START_SPEECH/STOP_SPEECH/EXAGGERATION special tokens with content names matching the post_processor template (transformers.js's added_tokens_map[name] lookup needs the raw 'BOS'/'EOS'/etc names to resolve, otherwise post-processor positions silently become UNK)

These mirror the fixes applied earlier to BricksDisplay/chatterbox-multilingual-ONNX, but for the q4 single-file variant.

.gitattributes CHANGED
@@ -1,35 +1,5 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  *.onnx filter=lfs diff=lfs merge=lfs -text
2
+ *.wav filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
4
  *.safetensors filter=lfs diff=lfs merge=lfs -text
5
+ Cangjie5_TC.json filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
Cangjie5_TC.json ADDED
The diff for this file is too large to render. See raw diff
 
README.md ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ language:
4
+ - ar
5
+ - da
6
+ - de
7
+ - el
8
+ - en
9
+ - es
10
+ - fi
11
+ - fr
12
+ - he
13
+ - hi
14
+ - it
15
+ - ja
16
+ - ko
17
+ - ms
18
+ - nl
19
+ - 'no'
20
+ - pl
21
+ - pt
22
+ - ru
23
+ - sv
24
+ - sw
25
+ - tr
26
+ - zh
27
+ pipeline_tag: text-to-speech
28
+ tags:
29
+ - text-to-speech
30
+ - speech
31
+ - speech-generation
32
+ - voice-cloning
33
+ - multilingual-tts
34
+ - onnx
35
+ - quantized
36
+ - q4
37
+ - transformers.js
38
+ library_name: transformers.js
39
+ base_model:
40
+ - onnx-community/chatterbox-multilingual-ONNX
41
+ ---
42
+
43
+ # Chatterbox Multilingual TTS - Q4 Quantized ONNX
44
+
45
+ Q4 weight-only quantized version of [onnx-community/chatterbox-multilingual-ONNX](https://huggingface.co/onnx-community/chatterbox-multilingual-ONNX) for use with **Transformers.js** and **ONNX Runtime Web**.
46
+
47
+ ## Key Features
48
+
49
+ - **75% smaller**: 790 MB vs 3.2 GB original
50
+ - **Single-file ONNX**: No external data files, compatible with Transformers.js
51
+ - **Same quality**: Minimal quality loss from Q4 quantization
52
+ - **23 languages supported**: ar, da, de, el, en, es, fi, fr, he, hi, it, ja, ko, ms, nl, no, pl, pt, ru, sv, sw, tr, zh
53
+
54
+ ## Model Sizes
55
+
56
+ | Model | Original (FP32) | Q4 Quantized |
57
+ |-------|-----------------|--------------|
58
+ | speech_encoder.onnx | 564 MB | 172 MB |
59
+ | embed_tokens.onnx | 66 MB | 65 MB |
60
+ | language_model.onnx | 2.0 GB | 338 MB |
61
+ | conditional_decoder.onnx | 510 MB | 215 MB |
62
+ | **Total** | **3.2 GB** | **790 MB** |
63
+
64
+ ## Usage
65
+
66
+ ### With ONNX Runtime (Python)
67
+
68
+ ```python
69
+ import onnxruntime
70
+
71
+ # Load Q4 models - single files, no external data needed
72
+ speech_encoder = onnxruntime.InferenceSession("onnx/speech_encoder.onnx")
73
+ embed_tokens = onnxruntime.InferenceSession("onnx/embed_tokens.onnx")
74
+ language_model = onnxruntime.InferenceSession("onnx/language_model.onnx")
75
+ conditional_decoder = onnxruntime.InferenceSession("onnx/conditional_decoder.onnx")
76
+ ```
77
+
78
+ ### With Transformers.js (JavaScript)
79
+
80
+ ```javascript
81
+ // Models are single-file ONNX format, compatible with ONNX Runtime Web
82
+ import { AutoTokenizer } from '@huggingface/transformers';
83
+
84
+ const tokenizer = await AutoTokenizer.from_pretrained('ipsilondev/chatterbox-multilingual-ONNX-q4');
85
+ ```
86
+
87
+ ## Quantization Details
88
+
89
+ - **Method**: Q4 weight-only quantization using `MatMulNBitsQuantizer`
90
+ - **Block size**: 32
91
+ - **Symmetric**: Yes
92
+ - **Format**: Single-file ONNX (no external data) for web compatibility
93
+
94
+ ## Important Parameters
95
+
96
+ When using these models, ensure you use the correct parameters:
97
+
98
+ ```python
99
+ repetition_penalty = 1.2 # CRITICAL: Do NOT use 2.0 - causes infinite loops
100
+ temperature = 0.8
101
+ top_p = 0.95
102
+ min_p = 0.05
103
+ ```
104
+
105
+ ## Supported Languages
106
+
107
+ | Code | Language | Code | Language |
108
+ |------|----------|------|----------|
109
+ | ar | Arabic | ko | Korean |
110
+ | da | Danish | ms | Malay |
111
+ | de | German | nl | Dutch |
112
+ | el | Greek | no | Norwegian |
113
+ | en | English | pl | Polish |
114
+ | es | Spanish | pt | Portuguese |
115
+ | fi | Finnish | ru | Russian |
116
+ | fr | French | sv | Swedish |
117
+ | he | Hebrew | sw | Swahili |
118
+ | hi | Hindi | tr | Turkish |
119
+ | it | Italian | zh | Chinese |
120
+ | ja | Japanese | | |
121
+
122
+ ## Credits
123
+
124
+ - Original model: [onnx-community/chatterbox-multilingual-ONNX](https://huggingface.co/onnx-community/chatterbox-multilingual-ONNX)
125
+ - Base model: [ResembleAI/chatterbox](https://github.com/resemble-ai/chatterbox)
126
+ - Quantization by: [ipsilondev](https://huggingface.co/ipsilondev)
127
+
128
+ ## License
129
+
130
+ MIT License (same as original model)
config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "chatterbox",
3
+ "architectures": [
4
+ "ChatterboxModel"
5
+ ],
6
+ "text_config": {
7
+ "architectures": [
8
+ "LlamaForCausalLM"
9
+ ],
10
+ "attention_bias": false,
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 1,
13
+ "eos_token_id": [
14
+ 2,
15
+ 6562
16
+ ],
17
+ "head_dim": 64,
18
+ "hidden_act": "silu",
19
+ "hidden_size": 1024,
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 4096,
22
+ "max_position_embeddings": 131072,
23
+ "mlp_bias": false,
24
+ "model_type": "llama",
25
+ "num_attention_heads": 16,
26
+ "num_hidden_layers": 30,
27
+ "num_key_value_heads": 16,
28
+ "pretraining_tp": 1,
29
+ "rms_norm_eps": 1e-05,
30
+ "rope_scaling": {
31
+ "factor": 8.0,
32
+ "high_freq_factor": 4.0,
33
+ "low_freq_factor": 1.0,
34
+ "original_max_position_embeddings": 8192,
35
+ "rope_type": "llama3"
36
+ },
37
+ "rope_theta": 500000.0,
38
+ "tie_word_embeddings": false,
39
+ "torch_dtype": "float32",
40
+ "use_cache": true,
41
+ "vocab_size": 2454
42
+ },
43
+ "transformers.js_config": {
44
+ "use_external_data_format": false,
45
+ "kv_cache_dtype": {
46
+ "fp16": "float16",
47
+ "q4f16": "float16"
48
+ }
49
+ }
50
+ }
default_voice.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ebc531cdaba358a327099c1c4f0448026719957bcf4d8e9868767f227e02f4e
3
+ size 714320
generation_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 1,
3
+ "eos_token_id": [
4
+ 2,
5
+ 6562
6
+ ],
7
+ "repetition_penalty": 1.2
8
+ }
onnx/conditional_decoder.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a7fae0f1c2a94f02e4a30e15242b9119be7b4b0428306e0dd9038e05328b85e
3
+ size 225572798
onnx/embed_tokens.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b213e0cf547c2de64146de96781ae8742a130fe54e1f577bc7e5ec6cfb39be06
3
+ size 68420479
onnx/language_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4f8bde071758d8e869a5b78e16d78d17c55ec594d445781f29e1ebcf39f1865
3
+ size 353810438
onnx/speech_encoder.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d32d3d75572c76cb7cd9e0489932fc758e242780c85ecd16bf6a3c2f0744838
3
+ size 180077492
tokenizer.json ADDED
@@ -0,0 +1,4014 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "special": true,
9
+ "content": "[STOP]",
10
+ "single_word": false,
11
+ "lstrip": false,
12
+ "rstrip": false,
13
+ "normalized": false
14
+ },
15
+ {
16
+ "id": 0,
17
+ "content": "EOS",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 1,
26
+ "special": true,
27
+ "content": "[UNK]",
28
+ "single_word": false,
29
+ "lstrip": false,
30
+ "rstrip": false,
31
+ "normalized": false
32
+ },
33
+ {
34
+ "id": 2,
35
+ "special": true,
36
+ "content": "[SPACE]",
37
+ "single_word": false,
38
+ "lstrip": false,
39
+ "rstrip": false,
40
+ "normalized": true
41
+ },
42
+ {
43
+ "id": 255,
44
+ "special": true,
45
+ "content": "[START]",
46
+ "single_word": false,
47
+ "lstrip": false,
48
+ "rstrip": false,
49
+ "normalized": false
50
+ },
51
+ {
52
+ "id": 255,
53
+ "content": "BOS",
54
+ "single_word": false,
55
+ "lstrip": false,
56
+ "rstrip": false,
57
+ "normalized": false,
58
+ "special": true
59
+ },
60
+ {
61
+ "id": 604,
62
+ "content": "[UH]",
63
+ "single_word": false,
64
+ "lstrip": false,
65
+ "rstrip": false,
66
+ "normalized": false,
67
+ "special": true
68
+ },
69
+ {
70
+ "id": 605,
71
+ "content": "[UM]",
72
+ "single_word": false,
73
+ "lstrip": false,
74
+ "rstrip": false,
75
+ "normalized": false,
76
+ "special": true
77
+ },
78
+ {
79
+ "id": 606,
80
+ "content": "[giggle]",
81
+ "single_word": false,
82
+ "lstrip": false,
83
+ "rstrip": false,
84
+ "normalized": false,
85
+ "special": true
86
+ },
87
+ {
88
+ "id": 607,
89
+ "content": "[laughter]",
90
+ "single_word": false,
91
+ "lstrip": false,
92
+ "rstrip": false,
93
+ "normalized": false,
94
+ "special": true
95
+ },
96
+ {
97
+ "id": 608,
98
+ "content": "[guffaw]",
99
+ "single_word": false,
100
+ "lstrip": false,
101
+ "rstrip": false,
102
+ "normalized": false,
103
+ "special": true
104
+ },
105
+ {
106
+ "id": 609,
107
+ "content": "[inhale]",
108
+ "single_word": false,
109
+ "lstrip": false,
110
+ "rstrip": false,
111
+ "normalized": false,
112
+ "special": true
113
+ },
114
+ {
115
+ "id": 610,
116
+ "content": "[exhale]",
117
+ "single_word": false,
118
+ "lstrip": false,
119
+ "rstrip": false,
120
+ "normalized": false,
121
+ "special": true
122
+ },
123
+ {
124
+ "id": 611,
125
+ "content": "[sigh]",
126
+ "single_word": false,
127
+ "lstrip": false,
128
+ "rstrip": false,
129
+ "normalized": false,
130
+ "special": true
131
+ },
132
+ {
133
+ "id": 612,
134
+ "content": "[cry]",
135
+ "single_word": false,
136
+ "lstrip": false,
137
+ "rstrip": false,
138
+ "normalized": false,
139
+ "special": true
140
+ },
141
+ {
142
+ "id": 613,
143
+ "content": "[bark]",
144
+ "single_word": false,
145
+ "lstrip": false,
146
+ "rstrip": false,
147
+ "normalized": false,
148
+ "special": true
149
+ },
150
+ {
151
+ "id": 614,
152
+ "content": "[howl]",
153
+ "single_word": false,
154
+ "lstrip": false,
155
+ "rstrip": false,
156
+ "normalized": false,
157
+ "special": true
158
+ },
159
+ {
160
+ "id": 615,
161
+ "content": "[meow]",
162
+ "single_word": false,
163
+ "lstrip": false,
164
+ "rstrip": false,
165
+ "normalized": false,
166
+ "special": true
167
+ },
168
+ {
169
+ "id": 616,
170
+ "content": "[singing]",
171
+ "single_word": false,
172
+ "lstrip": false,
173
+ "rstrip": false,
174
+ "normalized": false,
175
+ "special": true
176
+ },
177
+ {
178
+ "id": 617,
179
+ "content": "[music]",
180
+ "single_word": false,
181
+ "lstrip": false,
182
+ "rstrip": false,
183
+ "normalized": false,
184
+ "special": true
185
+ },
186
+ {
187
+ "id": 618,
188
+ "content": "[whistle]",
189
+ "single_word": false,
190
+ "lstrip": false,
191
+ "rstrip": false,
192
+ "normalized": false,
193
+ "special": true
194
+ },
195
+ {
196
+ "id": 619,
197
+ "content": "[humming]",
198
+ "single_word": false,
199
+ "lstrip": false,
200
+ "rstrip": false,
201
+ "normalized": false,
202
+ "special": true
203
+ },
204
+ {
205
+ "id": 620,
206
+ "content": "[gasp]",
207
+ "single_word": false,
208
+ "lstrip": false,
209
+ "rstrip": false,
210
+ "normalized": false,
211
+ "special": true
212
+ },
213
+ {
214
+ "id": 621,
215
+ "content": "[groan]",
216
+ "single_word": false,
217
+ "lstrip": false,
218
+ "rstrip": false,
219
+ "normalized": false,
220
+ "special": true
221
+ },
222
+ {
223
+ "id": 622,
224
+ "content": "[whisper]",
225
+ "single_word": false,
226
+ "lstrip": false,
227
+ "rstrip": false,
228
+ "normalized": false,
229
+ "special": true
230
+ },
231
+ {
232
+ "id": 623,
233
+ "content": "[mumble]",
234
+ "single_word": false,
235
+ "lstrip": false,
236
+ "rstrip": false,
237
+ "normalized": false,
238
+ "special": true
239
+ },
240
+ {
241
+ "id": 624,
242
+ "content": "[sniff]",
243
+ "single_word": false,
244
+ "lstrip": false,
245
+ "rstrip": false,
246
+ "normalized": false,
247
+ "special": true
248
+ },
249
+ {
250
+ "id": 625,
251
+ "content": "[sneeze]",
252
+ "single_word": false,
253
+ "lstrip": false,
254
+ "rstrip": false,
255
+ "normalized": false,
256
+ "special": true
257
+ },
258
+ {
259
+ "id": 626,
260
+ "content": "[cough]",
261
+ "single_word": false,
262
+ "lstrip": false,
263
+ "rstrip": false,
264
+ "normalized": false,
265
+ "special": true
266
+ },
267
+ {
268
+ "id": 627,
269
+ "content": "[snore]",
270
+ "single_word": false,
271
+ "lstrip": false,
272
+ "rstrip": false,
273
+ "normalized": false,
274
+ "special": true
275
+ },
276
+ {
277
+ "id": 628,
278
+ "content": "[chew]",
279
+ "single_word": false,
280
+ "lstrip": false,
281
+ "rstrip": false,
282
+ "normalized": false,
283
+ "special": true
284
+ },
285
+ {
286
+ "id": 629,
287
+ "content": "[sip]",
288
+ "single_word": false,
289
+ "lstrip": false,
290
+ "rstrip": false,
291
+ "normalized": false,
292
+ "special": true
293
+ },
294
+ {
295
+ "id": 630,
296
+ "content": "[clear_throat]",
297
+ "single_word": false,
298
+ "lstrip": false,
299
+ "rstrip": false,
300
+ "normalized": false,
301
+ "special": true
302
+ },
303
+ {
304
+ "id": 631,
305
+ "content": "[kiss]",
306
+ "single_word": false,
307
+ "lstrip": false,
308
+ "rstrip": false,
309
+ "normalized": false,
310
+ "special": true
311
+ },
312
+ {
313
+ "id": 632,
314
+ "content": "[shhh]",
315
+ "single_word": false,
316
+ "lstrip": false,
317
+ "rstrip": false,
318
+ "normalized": false,
319
+ "special": true
320
+ },
321
+ {
322
+ "id": 633,
323
+ "content": "[gibberish]",
324
+ "single_word": false,
325
+ "lstrip": false,
326
+ "rstrip": false,
327
+ "normalized": false,
328
+ "special": true
329
+ },
330
+ {
331
+ "id": 634,
332
+ "content": "[fr]",
333
+ "single_word": false,
334
+ "lstrip": false,
335
+ "rstrip": false,
336
+ "normalized": false,
337
+ "special": false
338
+ },
339
+ {
340
+ "id": 635,
341
+ "content": "[es]",
342
+ "single_word": false,
343
+ "lstrip": false,
344
+ "rstrip": false,
345
+ "normalized": false,
346
+ "special": false
347
+ },
348
+ {
349
+ "id": 636,
350
+ "content": "[de]",
351
+ "single_word": false,
352
+ "lstrip": false,
353
+ "rstrip": false,
354
+ "normalized": false,
355
+ "special": false
356
+ },
357
+ {
358
+ "id": 637,
359
+ "content": "[it]",
360
+ "single_word": false,
361
+ "lstrip": false,
362
+ "rstrip": false,
363
+ "normalized": false,
364
+ "special": false
365
+ },
366
+ {
367
+ "id": 638,
368
+ "content": "[ipa]",
369
+ "single_word": false,
370
+ "lstrip": false,
371
+ "rstrip": false,
372
+ "normalized": false,
373
+ "special": true
374
+ },
375
+ {
376
+ "id": 639,
377
+ "content": "[end_of_label]",
378
+ "single_word": false,
379
+ "lstrip": false,
380
+ "rstrip": false,
381
+ "normalized": false,
382
+ "special": true
383
+ },
384
+ {
385
+ "id": 695,
386
+ "content": "[PLACEHOLDER55]",
387
+ "single_word": false,
388
+ "lstrip": false,
389
+ "rstrip": false,
390
+ "normalized": false,
391
+ "special": true
392
+ },
393
+ {
394
+ "id": 696,
395
+ "content": "[PLACEHOLDER56]",
396
+ "single_word": false,
397
+ "lstrip": false,
398
+ "rstrip": false,
399
+ "normalized": false,
400
+ "special": true
401
+ },
402
+ {
403
+ "id": 697,
404
+ "content": "[PLACEHOLDER57]",
405
+ "single_word": false,
406
+ "lstrip": false,
407
+ "rstrip": false,
408
+ "normalized": false,
409
+ "special": true
410
+ },
411
+ {
412
+ "id": 698,
413
+ "content": "[PLACEHOLDER58]",
414
+ "single_word": false,
415
+ "lstrip": false,
416
+ "rstrip": false,
417
+ "normalized": false,
418
+ "special": true
419
+ },
420
+ {
421
+ "id": 699,
422
+ "content": "[PLACEHOLDER59]",
423
+ "single_word": false,
424
+ "lstrip": false,
425
+ "rstrip": false,
426
+ "normalized": false,
427
+ "special": true
428
+ },
429
+ {
430
+ "id": 700,
431
+ "content": "[PLACEHOLDER60]",
432
+ "single_word": false,
433
+ "lstrip": false,
434
+ "rstrip": false,
435
+ "normalized": false,
436
+ "special": true
437
+ },
438
+ {
439
+ "id": 701,
440
+ "content": "[PLACEHOLDER61]",
441
+ "single_word": false,
442
+ "lstrip": false,
443
+ "rstrip": false,
444
+ "normalized": false,
445
+ "special": true
446
+ },
447
+ {
448
+ "id": 702,
449
+ "content": "[PLACEHOLDER62]",
450
+ "single_word": false,
451
+ "lstrip": false,
452
+ "rstrip": false,
453
+ "normalized": false,
454
+ "special": true
455
+ },
456
+ {
457
+ "id": 703,
458
+ "content": "[PLACEHOLDER63]",
459
+ "single_word": false,
460
+ "lstrip": false,
461
+ "rstrip": false,
462
+ "normalized": false,
463
+ "special": true
464
+ },
465
+ {
466
+ "id": 704,
467
+ "content": "[MASK]",
468
+ "single_word": false,
469
+ "lstrip": false,
470
+ "rstrip": false,
471
+ "normalized": false,
472
+ "special": true
473
+ },
474
+ {
475
+ "id": 705,
476
+ "content": "[PAD]",
477
+ "single_word": false,
478
+ "lstrip": false,
479
+ "rstrip": false,
480
+ "normalized": false,
481
+ "special": true
482
+ },
483
+ {
484
+ "id": 706,
485
+ "content": "[SEP]",
486
+ "single_word": false,
487
+ "lstrip": false,
488
+ "rstrip": false,
489
+ "normalized": false,
490
+ "special": true
491
+ },
492
+ {
493
+ "id": 707,
494
+ "content": "[CLS]",
495
+ "single_word": false,
496
+ "lstrip": false,
497
+ "rstrip": false,
498
+ "normalized": false,
499
+ "special": true
500
+ },
501
+ {
502
+ "id": 708,
503
+ "content": "[en]",
504
+ "single_word": false,
505
+ "lstrip": false,
506
+ "rstrip": false,
507
+ "normalized": false,
508
+ "special": false
509
+ },
510
+ {
511
+ "id": 709,
512
+ "content": "[nl]",
513
+ "single_word": false,
514
+ "lstrip": false,
515
+ "rstrip": false,
516
+ "normalized": false,
517
+ "special": false
518
+ },
519
+ {
520
+ "id": 710,
521
+ "content": "[pt]",
522
+ "single_word": false,
523
+ "lstrip": false,
524
+ "rstrip": false,
525
+ "normalized": false,
526
+ "special": false
527
+ },
528
+ {
529
+ "id": 711,
530
+ "content": "[el]",
531
+ "single_word": false,
532
+ "lstrip": false,
533
+ "rstrip": false,
534
+ "normalized": false,
535
+ "special": false
536
+ },
537
+ {
538
+ "id": 712,
539
+ "content": "[tr]",
540
+ "single_word": false,
541
+ "lstrip": false,
542
+ "rstrip": false,
543
+ "normalized": false,
544
+ "special": false
545
+ },
546
+ {
547
+ "id": 713,
548
+ "content": "[sv]",
549
+ "single_word": false,
550
+ "lstrip": false,
551
+ "rstrip": false,
552
+ "normalized": false,
553
+ "special": false
554
+ },
555
+ {
556
+ "id": 714,
557
+ "content": "[no]",
558
+ "single_word": false,
559
+ "lstrip": false,
560
+ "rstrip": false,
561
+ "normalized": false,
562
+ "special": false
563
+ },
564
+ {
565
+ "id": 715,
566
+ "content": "[da]",
567
+ "single_word": false,
568
+ "lstrip": false,
569
+ "rstrip": false,
570
+ "normalized": false,
571
+ "special": false
572
+ },
573
+ {
574
+ "id": 716,
575
+ "content": "[ru]",
576
+ "single_word": false,
577
+ "lstrip": false,
578
+ "rstrip": false,
579
+ "normalized": false,
580
+ "special": false
581
+ },
582
+ {
583
+ "id": 717,
584
+ "content": "[pl]",
585
+ "single_word": false,
586
+ "lstrip": false,
587
+ "rstrip": false,
588
+ "normalized": false,
589
+ "special": false
590
+ },
591
+ {
592
+ "id": 718,
593
+ "content": "[sk]",
594
+ "single_word": false,
595
+ "lstrip": false,
596
+ "rstrip": false,
597
+ "normalized": false,
598
+ "special": false
599
+ },
600
+ {
601
+ "id": 719,
602
+ "content": "[cs]",
603
+ "single_word": false,
604
+ "lstrip": false,
605
+ "rstrip": false,
606
+ "normalized": false,
607
+ "special": false
608
+ },
609
+ {
610
+ "id": 720,
611
+ "content": "[hu]",
612
+ "single_word": false,
613
+ "lstrip": false,
614
+ "rstrip": false,
615
+ "normalized": false,
616
+ "special": false
617
+ },
618
+ {
619
+ "id": 721,
620
+ "content": "[ar]",
621
+ "single_word": false,
622
+ "lstrip": false,
623
+ "rstrip": false,
624
+ "normalized": false,
625
+ "special": false
626
+ },
627
+ {
628
+ "id": 722,
629
+ "content": "[hi]",
630
+ "single_word": false,
631
+ "lstrip": false,
632
+ "rstrip": false,
633
+ "normalized": false,
634
+ "special": false
635
+ },
636
+ {
637
+ "id": 723,
638
+ "content": "[ja]",
639
+ "single_word": false,
640
+ "lstrip": false,
641
+ "rstrip": false,
642
+ "normalized": false,
643
+ "special": false
644
+ },
645
+ {
646
+ "id": 724,
647
+ "content": "[ko]",
648
+ "single_word": false,
649
+ "lstrip": false,
650
+ "rstrip": false,
651
+ "normalized": false,
652
+ "special": false
653
+ },
654
+ {
655
+ "id": 725,
656
+ "content": "[zh]",
657
+ "single_word": false,
658
+ "lstrip": false,
659
+ "rstrip": false,
660
+ "normalized": false,
661
+ "special": false
662
+ },
663
+ {
664
+ "id": 726,
665
+ "content": "[ro]",
666
+ "single_word": false,
667
+ "lstrip": false,
668
+ "rstrip": false,
669
+ "normalized": false,
670
+ "special": false
671
+ },
672
+ {
673
+ "id": 727,
674
+ "content": "[bg]",
675
+ "single_word": false,
676
+ "lstrip": false,
677
+ "rstrip": false,
678
+ "normalized": false,
679
+ "special": false
680
+ },
681
+ {
682
+ "id": 729,
683
+ "content": "[ea]",
684
+ "single_word": false,
685
+ "lstrip": false,
686
+ "rstrip": false,
687
+ "normalized": false,
688
+ "special": false
689
+ },
690
+ {
691
+ "id": 730,
692
+ "content": "[sw]",
693
+ "single_word": false,
694
+ "lstrip": false,
695
+ "rstrip": false,
696
+ "normalized": false,
697
+ "special": false
698
+ },
699
+ {
700
+ "id": 732,
701
+ "content": "[vi]",
702
+ "single_word": false,
703
+ "lstrip": false,
704
+ "rstrip": false,
705
+ "normalized": false,
706
+ "special": false
707
+ },
708
+ {
709
+ "id": 733,
710
+ "content": "[cj_a]",
711
+ "single_word": false,
712
+ "lstrip": false,
713
+ "rstrip": false,
714
+ "normalized": false,
715
+ "special": true
716
+ },
717
+ {
718
+ "id": 734,
719
+ "content": "[cj_b]",
720
+ "single_word": false,
721
+ "lstrip": false,
722
+ "rstrip": false,
723
+ "normalized": false,
724
+ "special": true
725
+ },
726
+ {
727
+ "id": 735,
728
+ "content": "[cj_c]",
729
+ "single_word": false,
730
+ "lstrip": false,
731
+ "rstrip": false,
732
+ "normalized": false,
733
+ "special": true
734
+ },
735
+ {
736
+ "id": 736,
737
+ "content": "[cj_d]",
738
+ "single_word": false,
739
+ "lstrip": false,
740
+ "rstrip": false,
741
+ "normalized": false,
742
+ "special": true
743
+ },
744
+ {
745
+ "id": 737,
746
+ "content": "[cj_e]",
747
+ "single_word": false,
748
+ "lstrip": false,
749
+ "rstrip": false,
750
+ "normalized": false,
751
+ "special": true
752
+ },
753
+ {
754
+ "id": 738,
755
+ "content": "[cj_f]",
756
+ "single_word": false,
757
+ "lstrip": false,
758
+ "rstrip": false,
759
+ "normalized": false,
760
+ "special": true
761
+ },
762
+ {
763
+ "id": 739,
764
+ "content": "[cj_g]",
765
+ "single_word": false,
766
+ "lstrip": false,
767
+ "rstrip": false,
768
+ "normalized": false,
769
+ "special": true
770
+ },
771
+ {
772
+ "id": 740,
773
+ "content": "[cj_h]",
774
+ "single_word": false,
775
+ "lstrip": false,
776
+ "rstrip": false,
777
+ "normalized": false,
778
+ "special": true
779
+ },
780
+ {
781
+ "id": 741,
782
+ "content": "[cj_i]",
783
+ "single_word": false,
784
+ "lstrip": false,
785
+ "rstrip": false,
786
+ "normalized": false,
787
+ "special": true
788
+ },
789
+ {
790
+ "id": 742,
791
+ "content": "[cj_j]",
792
+ "single_word": false,
793
+ "lstrip": false,
794
+ "rstrip": false,
795
+ "normalized": false,
796
+ "special": true
797
+ },
798
+ {
799
+ "id": 743,
800
+ "content": "[cj_k]",
801
+ "single_word": false,
802
+ "lstrip": false,
803
+ "rstrip": false,
804
+ "normalized": false,
805
+ "special": true
806
+ },
807
+ {
808
+ "id": 744,
809
+ "content": "[cj_l]",
810
+ "single_word": false,
811
+ "lstrip": false,
812
+ "rstrip": false,
813
+ "normalized": false,
814
+ "special": true
815
+ },
816
+ {
817
+ "id": 745,
818
+ "content": "[cj_m]",
819
+ "single_word": false,
820
+ "lstrip": false,
821
+ "rstrip": false,
822
+ "normalized": false,
823
+ "special": true
824
+ },
825
+ {
826
+ "id": 746,
827
+ "content": "[cj_n]",
828
+ "single_word": false,
829
+ "lstrip": false,
830
+ "rstrip": false,
831
+ "normalized": false,
832
+ "special": true
833
+ },
834
+ {
835
+ "id": 747,
836
+ "content": "[cj_o]",
837
+ "single_word": false,
838
+ "lstrip": false,
839
+ "rstrip": false,
840
+ "normalized": false,
841
+ "special": true
842
+ },
843
+ {
844
+ "id": 748,
845
+ "content": "[cj_p]",
846
+ "single_word": false,
847
+ "lstrip": false,
848
+ "rstrip": false,
849
+ "normalized": false,
850
+ "special": true
851
+ },
852
+ {
853
+ "id": 749,
854
+ "content": "[cj_q]",
855
+ "single_word": false,
856
+ "lstrip": false,
857
+ "rstrip": false,
858
+ "normalized": false,
859
+ "special": true
860
+ },
861
+ {
862
+ "id": 750,
863
+ "content": "[cj_r]",
864
+ "single_word": false,
865
+ "lstrip": false,
866
+ "rstrip": false,
867
+ "normalized": false,
868
+ "special": true
869
+ },
870
+ {
871
+ "id": 751,
872
+ "content": "[cj_s]",
873
+ "single_word": false,
874
+ "lstrip": false,
875
+ "rstrip": false,
876
+ "normalized": false,
877
+ "special": true
878
+ },
879
+ {
880
+ "id": 752,
881
+ "content": "[cj_t]",
882
+ "single_word": false,
883
+ "lstrip": false,
884
+ "rstrip": false,
885
+ "normalized": false,
886
+ "special": true
887
+ },
888
+ {
889
+ "id": 753,
890
+ "content": "[cj_u]",
891
+ "single_word": false,
892
+ "lstrip": false,
893
+ "rstrip": false,
894
+ "normalized": false,
895
+ "special": true
896
+ },
897
+ {
898
+ "id": 754,
899
+ "content": "[cj_v]",
900
+ "single_word": false,
901
+ "lstrip": false,
902
+ "rstrip": false,
903
+ "normalized": false,
904
+ "special": true
905
+ },
906
+ {
907
+ "id": 755,
908
+ "content": "[cj_w]",
909
+ "single_word": false,
910
+ "lstrip": false,
911
+ "rstrip": false,
912
+ "normalized": false,
913
+ "special": true
914
+ },
915
+ {
916
+ "id": 756,
917
+ "content": "[cj_x]",
918
+ "single_word": false,
919
+ "lstrip": false,
920
+ "rstrip": false,
921
+ "normalized": false,
922
+ "special": true
923
+ },
924
+ {
925
+ "id": 757,
926
+ "content": "[cj_y]",
927
+ "single_word": false,
928
+ "lstrip": false,
929
+ "rstrip": false,
930
+ "normalized": false,
931
+ "special": true
932
+ },
933
+ {
934
+ "id": 758,
935
+ "content": "[cj_z]",
936
+ "single_word": false,
937
+ "lstrip": false,
938
+ "rstrip": false,
939
+ "normalized": false,
940
+ "special": true
941
+ },
942
+ {
943
+ "id": 759,
944
+ "content": "[cj_0]",
945
+ "single_word": false,
946
+ "lstrip": false,
947
+ "rstrip": false,
948
+ "normalized": false,
949
+ "special": true
950
+ },
951
+ {
952
+ "id": 760,
953
+ "content": "[cj_1]",
954
+ "single_word": false,
955
+ "lstrip": false,
956
+ "rstrip": false,
957
+ "normalized": false,
958
+ "special": true
959
+ },
960
+ {
961
+ "id": 761,
962
+ "content": "[cj_2]",
963
+ "single_word": false,
964
+ "lstrip": false,
965
+ "rstrip": false,
966
+ "normalized": false,
967
+ "special": true
968
+ },
969
+ {
970
+ "id": 2052,
971
+ "content": "[cj_3]",
972
+ "single_word": false,
973
+ "lstrip": false,
974
+ "rstrip": false,
975
+ "normalized": false,
976
+ "special": true
977
+ },
978
+ {
979
+ "id": 2053,
980
+ "content": "[cj_4]",
981
+ "single_word": false,
982
+ "lstrip": false,
983
+ "rstrip": false,
984
+ "normalized": false,
985
+ "special": true
986
+ },
987
+ {
988
+ "id": 2059,
989
+ "content": "[cj_5]",
990
+ "single_word": false,
991
+ "lstrip": false,
992
+ "rstrip": false,
993
+ "normalized": false,
994
+ "special": true
995
+ },
996
+ {
997
+ "id": 2060,
998
+ "content": "[cj_6]",
999
+ "single_word": false,
1000
+ "lstrip": false,
1001
+ "rstrip": false,
1002
+ "normalized": false,
1003
+ "special": true
1004
+ },
1005
+ {
1006
+ "id": 2061,
1007
+ "content": "[cj_7]",
1008
+ "single_word": false,
1009
+ "lstrip": false,
1010
+ "rstrip": false,
1011
+ "normalized": false,
1012
+ "special": true
1013
+ },
1014
+ {
1015
+ "id": 2062,
1016
+ "content": "[cj_8]",
1017
+ "single_word": false,
1018
+ "lstrip": false,
1019
+ "rstrip": false,
1020
+ "normalized": false,
1021
+ "special": true
1022
+ },
1023
+ {
1024
+ "id": 2063,
1025
+ "content": "[cj_9]",
1026
+ "single_word": false,
1027
+ "lstrip": false,
1028
+ "rstrip": false,
1029
+ "normalized": false,
1030
+ "special": true
1031
+ },
1032
+ {
1033
+ "id": 2064,
1034
+ "content": "[cj_.]",
1035
+ "single_word": false,
1036
+ "lstrip": false,
1037
+ "rstrip": false,
1038
+ "normalized": false,
1039
+ "special": true
1040
+ },
1041
+ {
1042
+ "id": 2065,
1043
+ "content": "[PLACEHOLDER45]",
1044
+ "single_word": false,
1045
+ "lstrip": false,
1046
+ "rstrip": false,
1047
+ "normalized": false,
1048
+ "special": true
1049
+ },
1050
+ {
1051
+ "id": 2107,
1052
+ "content": "[fi]",
1053
+ "single_word": false,
1054
+ "lstrip": false,
1055
+ "rstrip": false,
1056
+ "normalized": false,
1057
+ "special": false
1058
+ },
1059
+ {
1060
+ "id": 2108,
1061
+ "content": "[ta]",
1062
+ "single_word": false,
1063
+ "lstrip": false,
1064
+ "rstrip": false,
1065
+ "normalized": false,
1066
+ "special": false
1067
+ },
1068
+ {
1069
+ "id": 2109,
1070
+ "content": "[ms]",
1071
+ "single_word": false,
1072
+ "lstrip": false,
1073
+ "rstrip": false,
1074
+ "normalized": false,
1075
+ "special": false
1076
+ },
1077
+ {
1078
+ "id": 2110,
1079
+ "content": "[he]",
1080
+ "single_word": false,
1081
+ "lstrip": false,
1082
+ "rstrip": false,
1083
+ "normalized": false,
1084
+ "special": true
1085
+ },
1086
+ {
1087
+ "id": 6561,
1088
+ "content": "START_SPEECH",
1089
+ "single_word": false,
1090
+ "lstrip": false,
1091
+ "rstrip": false,
1092
+ "normalized": false,
1093
+ "special": true
1094
+ },
1095
+ {
1096
+ "id": 6562,
1097
+ "content": "STOP_SPEECH",
1098
+ "single_word": false,
1099
+ "lstrip": false,
1100
+ "rstrip": false,
1101
+ "normalized": false,
1102
+ "special": true
1103
+ },
1104
+ {
1105
+ "id": 6563,
1106
+ "content": "EXAGGERATION",
1107
+ "single_word": false,
1108
+ "lstrip": false,
1109
+ "rstrip": false,
1110
+ "normalized": false,
1111
+ "special": true
1112
+ }
1113
+ ],
1114
+ "normalizer": {
1115
+ "type": "Sequence",
1116
+ "normalizers": [
1117
+ {
1118
+ "type": "Replace",
1119
+ "pattern": {
1120
+ "String": " "
1121
+ },
1122
+ "content": "[SPACE]"
1123
+ }
1124
+ ]
1125
+ },
1126
+ "pre_tokenizer": {
1127
+ "type": "Whitespace"
1128
+ },
1129
+ "post_processor": {
1130
+ "type": "TemplateProcessing",
1131
+ "single": [
1132
+ {
1133
+ "SpecialToken": {
1134
+ "id": "EXAGGERATION",
1135
+ "type_id": 0
1136
+ }
1137
+ },
1138
+ {
1139
+ "SpecialToken": {
1140
+ "id": "BOS",
1141
+ "type_id": 0
1142
+ }
1143
+ },
1144
+ {
1145
+ "Sequence": {
1146
+ "id": "A",
1147
+ "type_id": 0
1148
+ }
1149
+ },
1150
+ {
1151
+ "SpecialToken": {
1152
+ "id": "EOS",
1153
+ "type_id": 0
1154
+ }
1155
+ },
1156
+ {
1157
+ "SpecialToken": {
1158
+ "id": "START_SPEECH",
1159
+ "type_id": 0
1160
+ }
1161
+ },
1162
+ {
1163
+ "SpecialToken": {
1164
+ "id": "START_SPEECH",
1165
+ "type_id": 0
1166
+ }
1167
+ }
1168
+ ],
1169
+ "pair": [
1170
+ {
1171
+ "SpecialToken": {
1172
+ "id": "EXAGGERATION",
1173
+ "type_id": 0
1174
+ }
1175
+ },
1176
+ {
1177
+ "SpecialToken": {
1178
+ "id": "BOS",
1179
+ "type_id": 0
1180
+ }
1181
+ },
1182
+ {
1183
+ "Sequence": {
1184
+ "id": "A",
1185
+ "type_id": 0
1186
+ }
1187
+ },
1188
+ {
1189
+ "SpecialToken": {
1190
+ "id": "EOS",
1191
+ "type_id": 0
1192
+ }
1193
+ },
1194
+ {
1195
+ "SpecialToken": {
1196
+ "id": "START_SPEECH",
1197
+ "type_id": 0
1198
+ }
1199
+ },
1200
+ {
1201
+ "SpecialToken": {
1202
+ "id": "START_SPEECH",
1203
+ "type_id": 0
1204
+ }
1205
+ },
1206
+ {
1207
+ "SpecialToken": {
1208
+ "id": "EXAGGERATION",
1209
+ "type_id": 1
1210
+ }
1211
+ },
1212
+ {
1213
+ "SpecialToken": {
1214
+ "id": "BOS",
1215
+ "type_id": 1
1216
+ }
1217
+ },
1218
+ {
1219
+ "Sequence": {
1220
+ "id": "B",
1221
+ "type_id": 1
1222
+ }
1223
+ },
1224
+ {
1225
+ "SpecialToken": {
1226
+ "id": "EOS",
1227
+ "type_id": 1
1228
+ }
1229
+ },
1230
+ {
1231
+ "SpecialToken": {
1232
+ "id": "START_SPEECH",
1233
+ "type_id": 1
1234
+ }
1235
+ },
1236
+ {
1237
+ "SpecialToken": {
1238
+ "id": "START_SPEECH",
1239
+ "type_id": 1
1240
+ }
1241
+ }
1242
+ ],
1243
+ "special_tokens": {
1244
+ "BOS": {
1245
+ "id": "BOS",
1246
+ "ids": [
1247
+ 255
1248
+ ],
1249
+ "tokens": [
1250
+ "<s>"
1251
+ ]
1252
+ },
1253
+ "EOS": {
1254
+ "id": "EOS",
1255
+ "ids": [
1256
+ 0
1257
+ ],
1258
+ "tokens": [
1259
+ "</s>"
1260
+ ]
1261
+ },
1262
+ "EXAGGERATION": {
1263
+ "id": "EXAGGERATION",
1264
+ "ids": [
1265
+ 6563
1266
+ ],
1267
+ "tokens": [
1268
+ "<EXAGGERATION>"
1269
+ ]
1270
+ },
1271
+ "START_SPEECH": {
1272
+ "id": "START_SPEECH",
1273
+ "ids": [
1274
+ 6561
1275
+ ],
1276
+ "tokens": [
1277
+ "<START_SPEECH>"
1278
+ ]
1279
+ }
1280
+ }
1281
+ },
1282
+ "decoder": null,
1283
+ "model": {
1284
+ "type": "BPE",
1285
+ "dropout": null,
1286
+ "unk_token": "[UNK]",
1287
+ "continuing_subword_prefix": null,
1288
+ "end_of_word_suffix": null,
1289
+ "fuse_unk": false,
1290
+ "vocab": {
1291
+ "[STOP]": 0,
1292
+ "[UNK]": 1,
1293
+ "[SPACE]": 2,
1294
+ "!": 3,
1295
+ "'": 4,
1296
+ "(": 5,
1297
+ ")": 6,
1298
+ ",": 7,
1299
+ "-": 8,
1300
+ ".": 9,
1301
+ "/": 10,
1302
+ ":": 11,
1303
+ ";": 12,
1304
+ "?": 13,
1305
+ "a": 14,
1306
+ "b": 15,
1307
+ "c": 16,
1308
+ "d": 17,
1309
+ "e": 18,
1310
+ "f": 19,
1311
+ "g": 20,
1312
+ "h": 21,
1313
+ "i": 22,
1314
+ "j": 23,
1315
+ "k": 24,
1316
+ "l": 25,
1317
+ "m": 26,
1318
+ "n": 27,
1319
+ "o": 28,
1320
+ "p": 29,
1321
+ "q": 30,
1322
+ "r": 31,
1323
+ "s": 32,
1324
+ "t": 33,
1325
+ "u": 34,
1326
+ "v": 35,
1327
+ "w": 36,
1328
+ "x": 37,
1329
+ "y": 38,
1330
+ "z": 39,
1331
+ "th": 40,
1332
+ "in": 41,
1333
+ "the": 42,
1334
+ "an": 43,
1335
+ "er": 44,
1336
+ "ou": 45,
1337
+ "re": 46,
1338
+ "on": 47,
1339
+ "at": 48,
1340
+ "ed": 49,
1341
+ "en": 50,
1342
+ "to": 51,
1343
+ "ing": 52,
1344
+ "and": 53,
1345
+ "is": 54,
1346
+ "as": 55,
1347
+ "al": 56,
1348
+ "or": 57,
1349
+ "of": 58,
1350
+ "ar": 59,
1351
+ "it": 60,
1352
+ "es": 61,
1353
+ "he": 62,
1354
+ "st": 63,
1355
+ "le": 64,
1356
+ "om": 65,
1357
+ "se": 66,
1358
+ "be": 67,
1359
+ "ad": 68,
1360
+ "ow": 69,
1361
+ "ly": 70,
1362
+ "ch": 71,
1363
+ "wh": 72,
1364
+ "that": 73,
1365
+ "you": 74,
1366
+ "li": 75,
1367
+ "ve": 76,
1368
+ "ac": 77,
1369
+ "ti": 78,
1370
+ "ld": 79,
1371
+ "me": 80,
1372
+ "was": 81,
1373
+ "gh": 82,
1374
+ "id": 83,
1375
+ "ll": 84,
1376
+ "wi": 85,
1377
+ "ent": 86,
1378
+ "for": 87,
1379
+ "ay": 88,
1380
+ "ro": 89,
1381
+ "ver": 90,
1382
+ "ic": 91,
1383
+ "her": 92,
1384
+ "ke": 93,
1385
+ "his": 94,
1386
+ "no": 95,
1387
+ "ut": 96,
1388
+ "un": 97,
1389
+ "ir": 98,
1390
+ "lo": 99,
1391
+ "we": 100,
1392
+ "ri": 101,
1393
+ "ha": 102,
1394
+ "with": 103,
1395
+ "ght": 104,
1396
+ "out": 105,
1397
+ "im": 106,
1398
+ "ion": 107,
1399
+ "all": 108,
1400
+ "ab": 109,
1401
+ "one": 110,
1402
+ "ne": 111,
1403
+ "ge": 112,
1404
+ "ould": 113,
1405
+ "ter": 114,
1406
+ "mo": 115,
1407
+ "had": 116,
1408
+ "ce": 117,
1409
+ "she": 118,
1410
+ "go": 119,
1411
+ "sh": 120,
1412
+ "ur": 121,
1413
+ "am": 122,
1414
+ "so": 123,
1415
+ "pe": 124,
1416
+ "my": 125,
1417
+ "de": 126,
1418
+ "are": 127,
1419
+ "but": 128,
1420
+ "ome": 129,
1421
+ "fr": 130,
1422
+ "ther": 131,
1423
+ "fe": 132,
1424
+ "su": 133,
1425
+ "do": 134,
1426
+ "con": 135,
1427
+ "te": 136,
1428
+ "ain": 137,
1429
+ "ere": 138,
1430
+ "po": 139,
1431
+ "if": 140,
1432
+ "they": 141,
1433
+ "us": 142,
1434
+ "ag": 143,
1435
+ "tr": 144,
1436
+ "now": 145,
1437
+ "oun": 146,
1438
+ "this": 147,
1439
+ "have": 148,
1440
+ "not": 149,
1441
+ "sa": 150,
1442
+ "il": 151,
1443
+ "up": 152,
1444
+ "thing": 153,
1445
+ "from": 154,
1446
+ "ap": 155,
1447
+ "him": 156,
1448
+ "ack": 157,
1449
+ "ation": 158,
1450
+ "ant": 159,
1451
+ "our": 160,
1452
+ "op": 161,
1453
+ "like": 162,
1454
+ "ust": 163,
1455
+ "ess": 164,
1456
+ "bo": 165,
1457
+ "ok": 166,
1458
+ "ul": 167,
1459
+ "ind": 168,
1460
+ "ex": 169,
1461
+ "com": 170,
1462
+ "some": 171,
1463
+ "there": 172,
1464
+ "ers": 173,
1465
+ "co": 174,
1466
+ "res": 175,
1467
+ "man": 176,
1468
+ "ard": 177,
1469
+ "pl": 178,
1470
+ "wor": 179,
1471
+ "way": 180,
1472
+ "tion": 181,
1473
+ "fo": 182,
1474
+ "ca": 183,
1475
+ "were": 184,
1476
+ "by": 185,
1477
+ "ate": 186,
1478
+ "pro": 187,
1479
+ "ted": 188,
1480
+ "ound": 189,
1481
+ "own": 190,
1482
+ "would": 191,
1483
+ "ts": 192,
1484
+ "what": 193,
1485
+ "qu": 194,
1486
+ "ally": 195,
1487
+ "ight": 196,
1488
+ "ck": 197,
1489
+ "gr": 198,
1490
+ "when": 199,
1491
+ "ven": 200,
1492
+ "can": 201,
1493
+ "ough": 202,
1494
+ "ine": 203,
1495
+ "end": 204,
1496
+ "per": 205,
1497
+ "ous": 206,
1498
+ "od": 207,
1499
+ "ide": 208,
1500
+ "know": 209,
1501
+ "ty": 210,
1502
+ "very": 211,
1503
+ "si": 212,
1504
+ "ak": 213,
1505
+ "who": 214,
1506
+ "about": 215,
1507
+ "ill": 216,
1508
+ "them": 217,
1509
+ "est": 218,
1510
+ "red": 219,
1511
+ "ye": 220,
1512
+ "could": 221,
1513
+ "ong": 222,
1514
+ "your": 223,
1515
+ "their": 224,
1516
+ "em": 225,
1517
+ "just": 226,
1518
+ "other": 227,
1519
+ "into": 228,
1520
+ "any": 229,
1521
+ "whi": 230,
1522
+ "um": 231,
1523
+ "tw": 232,
1524
+ "ast": 233,
1525
+ "der": 234,
1526
+ "did": 235,
1527
+ "ie": 236,
1528
+ "been": 237,
1529
+ "ace": 238,
1530
+ "ink": 239,
1531
+ "ity": 240,
1532
+ "back": 241,
1533
+ "ting": 242,
1534
+ "br": 243,
1535
+ "more": 244,
1536
+ "ake": 245,
1537
+ "pp": 246,
1538
+ "then": 247,
1539
+ "sp": 248,
1540
+ "el": 249,
1541
+ "use": 250,
1542
+ "bl": 251,
1543
+ "said": 252,
1544
+ "over": 253,
1545
+ "get": 254,
1546
+ "[START]": 255,
1547
+ "\"": 256,
1548
+ "#": 257,
1549
+ "$": 258,
1550
+ "%": 259,
1551
+ "&": 260,
1552
+ "*": 261,
1553
+ "+": 262,
1554
+ "0": 263,
1555
+ "1": 264,
1556
+ "2": 265,
1557
+ "3": 266,
1558
+ "4": 267,
1559
+ "5": 268,
1560
+ "6": 269,
1561
+ "7": 270,
1562
+ "8": 271,
1563
+ "9": 272,
1564
+ "<": 273,
1565
+ "=": 274,
1566
+ ">": 275,
1567
+ "@": 276,
1568
+ "A": 277,
1569
+ "B": 278,
1570
+ "C": 279,
1571
+ "D": 280,
1572
+ "E": 281,
1573
+ "F": 282,
1574
+ "G": 283,
1575
+ "H": 284,
1576
+ "I": 285,
1577
+ "J": 286,
1578
+ "K": 287,
1579
+ "L": 288,
1580
+ "M": 289,
1581
+ "N": 290,
1582
+ "O": 291,
1583
+ "P": 292,
1584
+ "Q": 293,
1585
+ "R": 294,
1586
+ "S": 295,
1587
+ "T": 296,
1588
+ "U": 297,
1589
+ "V": 298,
1590
+ "W": 299,
1591
+ "X": 300,
1592
+ "Y": 301,
1593
+ "Z": 302,
1594
+ "[": 303,
1595
+ "\\": 304,
1596
+ "]": 305,
1597
+ "^": 306,
1598
+ "_": 307,
1599
+ "`": 308,
1600
+ "{": 309,
1601
+ "|": 310,
1602
+ "}": 311,
1603
+ "~": 312,
1604
+ "‐": 313,
1605
+ "‑": 314,
1606
+ "‒": 315,
1607
+ "–": 316,
1608
+ "—": 317,
1609
+ "―": 318,
1610
+ "‖": 319,
1611
+ "‗": 320,
1612
+ "‘": 321,
1613
+ "’": 322,
1614
+ "‚": 323,
1615
+ "‛": 324,
1616
+ "“": 325,
1617
+ "”": 326,
1618
+ "„": 327,
1619
+ "‟": 328,
1620
+ " ": 329,
1621
+ "¡": 330,
1622
+ "¢": 331,
1623
+ "£": 332,
1624
+ "¤": 333,
1625
+ "¥": 334,
1626
+ "¦": 335,
1627
+ "§": 336,
1628
+ "¨": 337,
1629
+ "©": 338,
1630
+ "ª": 339,
1631
+ "«": 340,
1632
+ "¬": 341,
1633
+ "­": 342,
1634
+ "®": 343,
1635
+ "¯": 344,
1636
+ "°": 345,
1637
+ "±": 346,
1638
+ "²": 347,
1639
+ "³": 348,
1640
+ "´": 349,
1641
+ "µ": 350,
1642
+ "¶": 351,
1643
+ "·": 352,
1644
+ "¸": 353,
1645
+ "¹": 354,
1646
+ "º": 355,
1647
+ "»": 356,
1648
+ "¼": 357,
1649
+ "½": 358,
1650
+ "¾": 359,
1651
+ "¿": 360,
1652
+ "À": 361,
1653
+ "Á": 362,
1654
+ "Â": 363,
1655
+ "Ã": 364,
1656
+ "Ä": 365,
1657
+ "Å": 366,
1658
+ "Æ": 367,
1659
+ "Ç": 368,
1660
+ "È": 369,
1661
+ "É": 370,
1662
+ "Ê": 371,
1663
+ "Ë": 372,
1664
+ "Ì": 373,
1665
+ "Í": 374,
1666
+ "Î": 375,
1667
+ "Ï": 376,
1668
+ "Ð": 377,
1669
+ "Ñ": 378,
1670
+ "Ò": 379,
1671
+ "Ó": 380,
1672
+ "Ô": 381,
1673
+ "Õ": 382,
1674
+ "Ö": 383,
1675
+ "×": 384,
1676
+ "Ø": 385,
1677
+ "Ù": 386,
1678
+ "Ú": 387,
1679
+ "Û": 388,
1680
+ "Ü": 389,
1681
+ "Ý": 390,
1682
+ "Þ": 391,
1683
+ "ß": 392,
1684
+ "à": 393,
1685
+ "á": 394,
1686
+ "â": 395,
1687
+ "ã": 396,
1688
+ "ä": 397,
1689
+ "å": 398,
1690
+ "æ": 399,
1691
+ "ç": 400,
1692
+ "è": 401,
1693
+ "é": 402,
1694
+ "ê": 403,
1695
+ "ë": 404,
1696
+ "ì": 405,
1697
+ "í": 406,
1698
+ "î": 407,
1699
+ "ï": 408,
1700
+ "ð": 409,
1701
+ "ñ": 410,
1702
+ "ò": 411,
1703
+ "ó": 412,
1704
+ "ô": 413,
1705
+ "õ": 414,
1706
+ "ö": 415,
1707
+ "÷": 416,
1708
+ "ø": 417,
1709
+ "ù": 418,
1710
+ "ú": 419,
1711
+ "û": 420,
1712
+ "ü": 421,
1713
+ "ý": 422,
1714
+ "þ": 423,
1715
+ "ÿ": 424,
1716
+ "ɐ": 425,
1717
+ "ɑ": 426,
1718
+ "ɒ": 427,
1719
+ "ɓ": 428,
1720
+ "ɔ": 429,
1721
+ "ɕ": 430,
1722
+ "ɖ": 431,
1723
+ "ɗ": 432,
1724
+ "ɘ": 433,
1725
+ "ə": 434,
1726
+ "ɚ": 435,
1727
+ "ɛ": 436,
1728
+ "ɜ": 437,
1729
+ "ɝ": 438,
1730
+ "ɞ": 439,
1731
+ "ɟ": 440,
1732
+ "ɠ": 441,
1733
+ "ɡ": 442,
1734
+ "ɢ": 443,
1735
+ "ɣ": 444,
1736
+ "ɤ": 445,
1737
+ "ɥ": 446,
1738
+ "ɦ": 447,
1739
+ "ɧ": 448,
1740
+ "ɨ": 449,
1741
+ "ɩ": 450,
1742
+ "ɪ": 451,
1743
+ "ɫ": 452,
1744
+ "ɬ": 453,
1745
+ "ɭ": 454,
1746
+ "ɮ": 455,
1747
+ "ɯ": 456,
1748
+ "ɰ": 457,
1749
+ "ɱ": 458,
1750
+ "ɲ": 459,
1751
+ "ɳ": 460,
1752
+ "ɴ": 461,
1753
+ "ɵ": 462,
1754
+ "ɶ": 463,
1755
+ "ɷ": 464,
1756
+ "ɸ": 465,
1757
+ "ɹ": 466,
1758
+ "ɺ": 467,
1759
+ "ɻ": 468,
1760
+ "ɼ": 469,
1761
+ "ɽ": 470,
1762
+ "ɾ": 471,
1763
+ "ɿ": 472,
1764
+ "ʀ": 473,
1765
+ "ʁ": 474,
1766
+ "ʂ": 475,
1767
+ "ʃ": 476,
1768
+ "ʄ": 477,
1769
+ "ʅ": 478,
1770
+ "ʆ": 479,
1771
+ "ʇ": 480,
1772
+ "ʈ": 481,
1773
+ "ʉ": 482,
1774
+ "ʊ": 483,
1775
+ "ʋ": 484,
1776
+ "ʌ": 485,
1777
+ "ʍ": 486,
1778
+ "ʎ": 487,
1779
+ "ʏ": 488,
1780
+ "ʐ": 489,
1781
+ "ʑ": 490,
1782
+ "ʒ": 491,
1783
+ "ʓ": 492,
1784
+ "ʔ": 493,
1785
+ "ʕ": 494,
1786
+ "ʖ": 495,
1787
+ "ʗ": 496,
1788
+ "ʘ": 497,
1789
+ "ʙ": 498,
1790
+ "ʚ": 499,
1791
+ "ʛ": 500,
1792
+ "ʜ": 501,
1793
+ "ʝ": 502,
1794
+ "ʞ": 503,
1795
+ "ʟ": 504,
1796
+ "ʠ": 505,
1797
+ "ʡ": 506,
1798
+ "ʢ": 507,
1799
+ "ʣ": 508,
1800
+ "ʤ": 509,
1801
+ "ʥ": 510,
1802
+ "ʦ": 511,
1803
+ "ʧ": 512,
1804
+ "ʨ": 513,
1805
+ "ʩ": 514,
1806
+ "ʪ": 515,
1807
+ "ʫ": 516,
1808
+ "ʬ": 517,
1809
+ "ʭ": 518,
1810
+ "ʮ": 519,
1811
+ "ʯ": 520,
1812
+ "ʰ": 521,
1813
+ "ʱ": 522,
1814
+ "ʲ": 523,
1815
+ "ʳ": 524,
1816
+ "ʴ": 525,
1817
+ "ʵ": 526,
1818
+ "ʶ": 527,
1819
+ "ʷ": 528,
1820
+ "ʸ": 529,
1821
+ "ʹ": 530,
1822
+ "ʺ": 531,
1823
+ "ʻ": 532,
1824
+ "ʼ": 533,
1825
+ "ʽ": 534,
1826
+ "ʾ": 535,
1827
+ "ʿ": 536,
1828
+ "ˀ": 537,
1829
+ "ˁ": 538,
1830
+ "˂": 539,
1831
+ "˃": 540,
1832
+ "˄": 541,
1833
+ "˅": 542,
1834
+ "ˆ": 543,
1835
+ "ˇ": 544,
1836
+ "ˈ": 545,
1837
+ "ˉ": 546,
1838
+ "ˊ": 547,
1839
+ "ˋ": 548,
1840
+ "ˌ": 549,
1841
+ "ˍ": 550,
1842
+ "ˎ": 551,
1843
+ "ˏ": 552,
1844
+ "ː": 553,
1845
+ "ˑ": 554,
1846
+ "˒": 555,
1847
+ "˓": 556,
1848
+ "˔": 557,
1849
+ "˕": 558,
1850
+ "˖": 559,
1851
+ "˗": 560,
1852
+ "˘": 561,
1853
+ "˙": 562,
1854
+ "˚": 563,
1855
+ "˛": 564,
1856
+ "˜": 565,
1857
+ "˝": 566,
1858
+ "˞": 567,
1859
+ "˟": 568,
1860
+ "ˠ": 569,
1861
+ "ˡ": 570,
1862
+ "ˢ": 571,
1863
+ "ˣ": 572,
1864
+ "ˤ": 573,
1865
+ "˥": 574,
1866
+ "˦": 575,
1867
+ "˧": 576,
1868
+ "˨": 577,
1869
+ "˩": 578,
1870
+ "˪": 579,
1871
+ "˫": 580,
1872
+ "ˬ": 581,
1873
+ "˭": 582,
1874
+ "ˮ": 583,
1875
+ "˯": 584,
1876
+ "˰": 585,
1877
+ "˱": 586,
1878
+ "˲": 587,
1879
+ "˳": 588,
1880
+ "˴": 589,
1881
+ "˵": 590,
1882
+ "˶": 591,
1883
+ "˷": 592,
1884
+ "˸": 593,
1885
+ "˹": 594,
1886
+ "˺": 595,
1887
+ "˻": 596,
1888
+ "˼": 597,
1889
+ "˽": 598,
1890
+ "˾": 599,
1891
+ "˿": 600,
1892
+ "ā": 601,
1893
+ "ō": 602,
1894
+ "…": 603,
1895
+ "[UH]": 604,
1896
+ "[UM]": 605,
1897
+ "[giggle]": 606,
1898
+ "[laughter]": 607,
1899
+ "[guffaw]": 608,
1900
+ "[inhale]": 609,
1901
+ "[exhale]": 610,
1902
+ "[sigh]": 611,
1903
+ "[cry]": 612,
1904
+ "[bark]": 613,
1905
+ "[howl]": 614,
1906
+ "[meow]": 615,
1907
+ "[singing]": 616,
1908
+ "[music]": 617,
1909
+ "[whistle]": 618,
1910
+ "[humming]": 619,
1911
+ "[gasp]": 620,
1912
+ "[groan]": 621,
1913
+ "[whisper]": 622,
1914
+ "[mumble]": 623,
1915
+ "[sniff]": 624,
1916
+ "[sneeze]": 625,
1917
+ "[cough]": 626,
1918
+ "[snore]": 627,
1919
+ "[chew]": 628,
1920
+ "[sip]": 629,
1921
+ "[clear_throat]": 630,
1922
+ "[kiss]": 631,
1923
+ "[shhh]": 632,
1924
+ "[gibberish]": 633,
1925
+ "[fr]": 634,
1926
+ "[es]": 635,
1927
+ "[de]": 636,
1928
+ "[it]": 637,
1929
+ "[ipa]": 638,
1930
+ "[end_of_label]": 639,
1931
+ "ŋ": 640,
1932
+ "ᵻ": 641,
1933
+ "θ": 642,
1934
+ "̩": 643,
1935
+ "̃": 644,
1936
+ "ɑː": 645,
1937
+ "iː": 646,
1938
+ "uː": 647,
1939
+ "ɜː": 648,
1940
+ "ɔː": 649,
1941
+ "oː": 650,
1942
+ "eɪ": 651,
1943
+ "oʊ": 652,
1944
+ "aɪ": 653,
1945
+ "aʊ": 654,
1946
+ "ɔɪ": 655,
1947
+ "dʒ": 656,
1948
+ "tʃ": 657,
1949
+ "ɪŋ": 658,
1950
+ "ᵻd": 659,
1951
+ "ˈiː": 660,
1952
+ "ˌiː": 661,
1953
+ "ˈɪ": 662,
1954
+ "ˌɪ": 663,
1955
+ "ˈeɪ": 664,
1956
+ "ˌeɪ": 665,
1957
+ "ˈɛ": 666,
1958
+ "ˌɛ": 667,
1959
+ "ˈæ": 668,
1960
+ "ˌæ": 669,
1961
+ "ˈɑː": 670,
1962
+ "ˌɑː": 671,
1963
+ "ˈɔː": 672,
1964
+ "ˌɔː": 673,
1965
+ "oːɹ": 674,
1966
+ "ˈoːɹ": 675,
1967
+ "ˌoːɹ": 676,
1968
+ "ˈoʊ": 677,
1969
+ "ˌoʊ": 678,
1970
+ "ˈʊ": 679,
1971
+ "ˌʊ": 680,
1972
+ "ˈuː": 681,
1973
+ "ˌuː": 682,
1974
+ "ˈɜː": 683,
1975
+ "ˌɜː": 684,
1976
+ "ˈʌ": 685,
1977
+ "ˌʌ": 686,
1978
+ "ˈaɪ": 687,
1979
+ "ˌaɪ": 688,
1980
+ "ˈaʊ": 689,
1981
+ "ˌaʊ": 690,
1982
+ "ˈɔɪ": 691,
1983
+ "ˌɔɪ": 692,
1984
+ "ˈɚ": 693,
1985
+ "ˌɐ": 694,
1986
+ "[PLACEHOLDER55]": 695,
1987
+ "[PLACEHOLDER56]": 696,
1988
+ "[PLACEHOLDER57]": 697,
1989
+ "[PLACEHOLDER58]": 698,
1990
+ "[PLACEHOLDER59]": 699,
1991
+ "[PLACEHOLDER60]": 700,
1992
+ "[PLACEHOLDER61]": 701,
1993
+ "[PLACEHOLDER62]": 702,
1994
+ "[PLACEHOLDER63]": 703,
1995
+ "[MASK]": 704,
1996
+ "[PAD]": 705,
1997
+ "[SEP]": 706,
1998
+ "[CLS]": 707,
1999
+ "[en]": 708,
2000
+ "[nl]": 709,
2001
+ "[pt]": 710,
2002
+ "[el]": 711,
2003
+ "[tr]": 712,
2004
+ "[sv]": 713,
2005
+ "[no]": 714,
2006
+ "[da]": 715,
2007
+ "[ru]": 716,
2008
+ "[pl]": 717,
2009
+ "[sk]": 718,
2010
+ "[cs]": 719,
2011
+ "[hu]": 720,
2012
+ "[ar]": 721,
2013
+ "[hi]": 722,
2014
+ "[ja]": 723,
2015
+ "[ko]": 724,
2016
+ "[zh]": 725,
2017
+ "[ro]": 726,
2018
+ "[bg]": 727,
2019
+ " ": 728,
2020
+ "[ea]": 729,
2021
+ "[sw]": 730,
2022
+ "─": 731,
2023
+ "[vi]": 732,
2024
+ "[cj_a]": 733,
2025
+ "[cj_b]": 734,
2026
+ "[cj_c]": 735,
2027
+ "[cj_d]": 736,
2028
+ "[cj_e]": 737,
2029
+ "[cj_f]": 738,
2030
+ "[cj_g]": 739,
2031
+ "[cj_h]": 740,
2032
+ "[cj_i]": 741,
2033
+ "[cj_j]": 742,
2034
+ "[cj_k]": 743,
2035
+ "[cj_l]": 744,
2036
+ "[cj_m]": 745,
2037
+ "[cj_n]": 746,
2038
+ "[cj_o]": 747,
2039
+ "[cj_p]": 748,
2040
+ "[cj_q]": 749,
2041
+ "[cj_r]": 750,
2042
+ "[cj_s]": 751,
2043
+ "[cj_t]": 752,
2044
+ "[cj_u]": 753,
2045
+ "[cj_v]": 754,
2046
+ "[cj_w]": 755,
2047
+ "[cj_x]": 756,
2048
+ "[cj_y]": 757,
2049
+ "[cj_z]": 758,
2050
+ "[cj_0]": 759,
2051
+ "[cj_1]": 760,
2052
+ "[cj_2]": 761,
2053
+ "̈": 762,
2054
+ "̄": 763,
2055
+ "́": 764,
2056
+ "μ": 765,
2057
+ "̧": 766,
2058
+ "⁄": 767,
2059
+ "̀": 768,
2060
+ "̂": 769,
2061
+ "̊": 770,
2062
+ "̆": 771,
2063
+ "̨": 772,
2064
+ "̇": 773,
2065
+ "̌": 774,
2066
+ "Đ": 775,
2067
+ "đ": 776,
2068
+ "Ħ": 777,
2069
+ "ħ": 778,
2070
+ "ı": 779,
2071
+ "ĸ": 780,
2072
+ "Ł": 781,
2073
+ "ł": 782,
2074
+ "Ŋ": 783,
2075
+ "̋": 784,
2076
+ "Œ": 785,
2077
+ "œ": 786,
2078
+ "Ŧ": 787,
2079
+ "ŧ": 788,
2080
+ "ƀ": 789,
2081
+ "Ɓ": 790,
2082
+ "Ƃ": 791,
2083
+ "ƃ": 792,
2084
+ "Ƅ": 793,
2085
+ "ƅ": 794,
2086
+ "Ɔ": 795,
2087
+ "Ƈ": 796,
2088
+ "ƈ": 797,
2089
+ "Ɖ": 798,
2090
+ "Ɗ": 799,
2091
+ "Ƌ": 800,
2092
+ "ƌ": 801,
2093
+ "ƍ": 802,
2094
+ "Ǝ": 803,
2095
+ "Ə": 804,
2096
+ "Ɛ": 805,
2097
+ "Ƒ": 806,
2098
+ "ƒ": 807,
2099
+ "Ɠ": 808,
2100
+ "Ɣ": 809,
2101
+ "ƕ": 810,
2102
+ "Ɩ": 811,
2103
+ "Ɨ": 812,
2104
+ "Ƙ": 813,
2105
+ "ƙ": 814,
2106
+ "ƚ": 815,
2107
+ "ƛ": 816,
2108
+ "Ɯ": 817,
2109
+ "Ɲ": 818,
2110
+ "ƞ": 819,
2111
+ "Ɵ": 820,
2112
+ "̛": 821,
2113
+ "Ƣ": 822,
2114
+ "ƣ": 823,
2115
+ "Ƥ": 824,
2116
+ "ƥ": 825,
2117
+ "Ʀ": 826,
2118
+ "Ƨ": 827,
2119
+ "ƨ": 828,
2120
+ "Ʃ": 829,
2121
+ "ƪ": 830,
2122
+ "ƫ": 831,
2123
+ "Ƭ": 832,
2124
+ "ƭ": 833,
2125
+ "Ʈ": 834,
2126
+ "Ʊ": 835,
2127
+ "Ʋ": 836,
2128
+ "Ƴ": 837,
2129
+ "ƴ": 838,
2130
+ "Ƶ": 839,
2131
+ "ƶ": 840,
2132
+ "Ʒ": 841,
2133
+ "Ƹ": 842,
2134
+ "ƹ": 843,
2135
+ "ƺ": 844,
2136
+ "ƻ": 845,
2137
+ "Ƽ": 846,
2138
+ "ƽ": 847,
2139
+ "ƾ": 848,
2140
+ "ƿ": 849,
2141
+ "ǀ": 850,
2142
+ "ǁ": 851,
2143
+ "ǂ": 852,
2144
+ "ǃ": 853,
2145
+ "ǝ": 854,
2146
+ "Ǥ": 855,
2147
+ "ǥ": 856,
2148
+ "Ƕ": 857,
2149
+ "Ƿ": 858,
2150
+ "̏": 859,
2151
+ "̑": 860,
2152
+ "̦": 861,
2153
+ "Ȝ": 862,
2154
+ "ȝ": 863,
2155
+ "Ƞ": 864,
2156
+ "ȡ": 865,
2157
+ "Ȣ": 866,
2158
+ "ȣ": 867,
2159
+ "Ȥ": 868,
2160
+ "ȥ": 869,
2161
+ "ȴ": 870,
2162
+ "ȵ": 871,
2163
+ "ȶ": 872,
2164
+ "ȷ": 873,
2165
+ "ȸ": 874,
2166
+ "ȹ": 875,
2167
+ "Ⱥ": 876,
2168
+ "Ȼ": 877,
2169
+ "ȼ": 878,
2170
+ "Ƚ": 879,
2171
+ "Ⱦ": 880,
2172
+ "ȿ": 881,
2173
+ "ɀ": 882,
2174
+ "Ɂ": 883,
2175
+ "ɂ": 884,
2176
+ "Ƀ": 885,
2177
+ "Ʉ": 886,
2178
+ "Ʌ": 887,
2179
+ "Ɇ": 888,
2180
+ "ɇ": 889,
2181
+ "Ɉ": 890,
2182
+ "ɉ": 891,
2183
+ "Ɋ": 892,
2184
+ "ɋ": 893,
2185
+ "Ɍ": 894,
2186
+ "ɍ": 895,
2187
+ "Ɏ": 896,
2188
+ "ɏ": 897,
2189
+ "̅": 898,
2190
+ "̉": 899,
2191
+ "̍": 900,
2192
+ "̎": 901,
2193
+ "̐": 902,
2194
+ "̒": 903,
2195
+ "̓": 904,
2196
+ "̔": 905,
2197
+ "̕": 906,
2198
+ "̖": 907,
2199
+ "̗": 908,
2200
+ "̘": 909,
2201
+ "̙": 910,
2202
+ "̚": 911,
2203
+ "̜": 912,
2204
+ "̝": 913,
2205
+ "̞": 914,
2206
+ "̟": 915,
2207
+ "̠": 916,
2208
+ "̡": 917,
2209
+ "̢": 918,
2210
+ "̣": 919,
2211
+ "̤": 920,
2212
+ "̥": 921,
2213
+ "̪": 922,
2214
+ "̫": 923,
2215
+ "̬": 924,
2216
+ "̭": 925,
2217
+ "̮": 926,
2218
+ "̯": 927,
2219
+ "̰": 928,
2220
+ "̱": 929,
2221
+ "̲": 930,
2222
+ "̳": 931,
2223
+ "̴": 932,
2224
+ "̵": 933,
2225
+ "̶": 934,
2226
+ "̷": 935,
2227
+ "̸": 936,
2228
+ "̹": 937,
2229
+ "̺": 938,
2230
+ "̻": 939,
2231
+ "̼": 940,
2232
+ "̽": 941,
2233
+ "̾": 942,
2234
+ "̿": 943,
2235
+ "͂": 944,
2236
+ "ͅ": 945,
2237
+ "͆": 946,
2238
+ "͇": 947,
2239
+ "͈": 948,
2240
+ "͉": 949,
2241
+ "͊": 950,
2242
+ "͋": 951,
2243
+ "͌": 952,
2244
+ "͍": 953,
2245
+ "͎": 954,
2246
+ "͏": 955,
2247
+ "͐": 956,
2248
+ "͑": 957,
2249
+ "͒": 958,
2250
+ "͓": 959,
2251
+ "͔": 960,
2252
+ "͕": 961,
2253
+ "͖": 962,
2254
+ "͗": 963,
2255
+ "͘": 964,
2256
+ "͙": 965,
2257
+ "͚": 966,
2258
+ "͛": 967,
2259
+ "͜": 968,
2260
+ "͝": 969,
2261
+ "͞": 970,
2262
+ "͟": 971,
2263
+ "͠": 972,
2264
+ "͡": 973,
2265
+ "͢": 974,
2266
+ "ͣ": 975,
2267
+ "ͤ": 976,
2268
+ "ͥ": 977,
2269
+ "ͦ": 978,
2270
+ "ͧ": 979,
2271
+ "ͨ": 980,
2272
+ "ͩ": 981,
2273
+ "ͪ": 982,
2274
+ "ͫ": 983,
2275
+ "ͬ": 984,
2276
+ "ͭ": 985,
2277
+ "ͮ": 986,
2278
+ "ͯ": 987,
2279
+ "Ͱ": 988,
2280
+ "ͱ": 989,
2281
+ "Ͳ": 990,
2282
+ "ͳ": 991,
2283
+ "͵": 992,
2284
+ "Ͷ": 993,
2285
+ "ͷ": 994,
2286
+ "ͻ": 995,
2287
+ "ͼ": 996,
2288
+ "ͽ": 997,
2289
+ "Ϳ": 998,
2290
+ "Α": 999,
2291
+ "Ε": 1000,
2292
+ "Η": 1001,
2293
+ "Ι": 1002,
2294
+ "Ο": 1003,
2295
+ "Υ": 1004,
2296
+ "Ω": 1005,
2297
+ "ι": 1006,
2298
+ "Β": 1007,
2299
+ "Γ": 1008,
2300
+ "Δ": 1009,
2301
+ "Ζ": 1010,
2302
+ "Θ": 1011,
2303
+ "Κ": 1012,
2304
+ "Λ": 1013,
2305
+ "Μ": 1014,
2306
+ "Ν": 1015,
2307
+ "Ξ": 1016,
2308
+ "Π": 1017,
2309
+ "Ρ": 1018,
2310
+ "Σ": 1019,
2311
+ "Τ": 1020,
2312
+ "Φ": 1021,
2313
+ "Χ": 1022,
2314
+ "Ψ": 1023,
2315
+ "α": 1024,
2316
+ "ε": 1025,
2317
+ "η": 1026,
2318
+ "υ": 1027,
2319
+ "β": 1028,
2320
+ "γ": 1029,
2321
+ "δ": 1030,
2322
+ "ζ": 1031,
2323
+ "κ": 1032,
2324
+ "λ": 1033,
2325
+ "ν": 1034,
2326
+ "ξ": 1035,
2327
+ "ο": 1036,
2328
+ "π": 1037,
2329
+ "ρ": 1038,
2330
+ "ς": 1039,
2331
+ "σ": 1040,
2332
+ "τ": 1041,
2333
+ "φ": 1042,
2334
+ "χ": 1043,
2335
+ "ψ": 1044,
2336
+ "ω": 1045,
2337
+ "Ϗ": 1046,
2338
+ "ϗ": 1047,
2339
+ "Ϙ": 1048,
2340
+ "ϙ": 1049,
2341
+ "Ϛ": 1050,
2342
+ "ϛ": 1051,
2343
+ "Ϝ": 1052,
2344
+ "ϝ": 1053,
2345
+ "Ϟ": 1054,
2346
+ "ϟ": 1055,
2347
+ "Ϡ": 1056,
2348
+ "ϡ": 1057,
2349
+ "Ϣ": 1058,
2350
+ "ϣ": 1059,
2351
+ "Ϥ": 1060,
2352
+ "ϥ": 1061,
2353
+ "Ϧ": 1062,
2354
+ "ϧ": 1063,
2355
+ "Ϩ": 1064,
2356
+ "ϩ": 1065,
2357
+ "Ϫ": 1066,
2358
+ "ϫ": 1067,
2359
+ "Ϭ": 1068,
2360
+ "ϭ": 1069,
2361
+ "Ϯ": 1070,
2362
+ "ϯ": 1071,
2363
+ "ϳ": 1072,
2364
+ "϶": 1073,
2365
+ "Ϸ": 1074,
2366
+ "ϸ": 1075,
2367
+ "Ϻ": 1076,
2368
+ "ϻ": 1077,
2369
+ "ϼ": 1078,
2370
+ "Ͻ": 1079,
2371
+ "Ͼ": 1080,
2372
+ "Ͽ": 1081,
2373
+ "Е": 1082,
2374
+ "Ђ": 1083,
2375
+ "Г": 1084,
2376
+ "Є": 1085,
2377
+ "Ѕ": 1086,
2378
+ "І": 1087,
2379
+ "Ј": 1088,
2380
+ "Љ": 1089,
2381
+ "Њ": 1090,
2382
+ "Ћ": 1091,
2383
+ "К": 1092,
2384
+ "И": 1093,
2385
+ "У": 1094,
2386
+ "Џ": 1095,
2387
+ "А": 1096,
2388
+ "Б": 1097,
2389
+ "В": 1098,
2390
+ "Д": 1099,
2391
+ "Ж": 1100,
2392
+ "З": 1101,
2393
+ "Л": 1102,
2394
+ "М": 1103,
2395
+ "Н": 1104,
2396
+ "О": 1105,
2397
+ "П": 1106,
2398
+ "Р": 1107,
2399
+ "С": 1108,
2400
+ "Т": 1109,
2401
+ "Ф": 1110,
2402
+ "Х": 1111,
2403
+ "Ц": 1112,
2404
+ "Ч": 1113,
2405
+ "Ш": 1114,
2406
+ "Щ": 1115,
2407
+ "Ъ": 1116,
2408
+ "Ы": 1117,
2409
+ "Ь": 1118,
2410
+ "Э": 1119,
2411
+ "Ю": 1120,
2412
+ "Я": 1121,
2413
+ "а": 1122,
2414
+ "б": 1123,
2415
+ "в": 1124,
2416
+ "г": 1125,
2417
+ "д": 1126,
2418
+ "е": 1127,
2419
+ "ж": 1128,
2420
+ "з": 1129,
2421
+ "и": 1130,
2422
+ "к": 1131,
2423
+ "л": 1132,
2424
+ "м": 1133,
2425
+ "н": 1134,
2426
+ "о": 1135,
2427
+ "п": 1136,
2428
+ "р": 1137,
2429
+ "с": 1138,
2430
+ "т": 1139,
2431
+ "у": 1140,
2432
+ "ф": 1141,
2433
+ "х": 1142,
2434
+ "ц": 1143,
2435
+ "ч": 1144,
2436
+ "ш": 1145,
2437
+ "щ": 1146,
2438
+ "ъ": 1147,
2439
+ "ы": 1148,
2440
+ "ь": 1149,
2441
+ "э": 1150,
2442
+ "ю": 1151,
2443
+ "я": 1152,
2444
+ "ђ": 1153,
2445
+ "є": 1154,
2446
+ "ѕ": 1155,
2447
+ "і": 1156,
2448
+ "ј": 1157,
2449
+ "љ": 1158,
2450
+ "њ": 1159,
2451
+ "ћ": 1160,
2452
+ "џ": 1161,
2453
+ "Ѡ": 1162,
2454
+ "ѡ": 1163,
2455
+ "Ѣ": 1164,
2456
+ "ѣ": 1165,
2457
+ "Ѥ": 1166,
2458
+ "ѥ": 1167,
2459
+ "Ѧ": 1168,
2460
+ "ѧ": 1169,
2461
+ "Ѩ": 1170,
2462
+ "ѩ": 1171,
2463
+ "Ѫ": 1172,
2464
+ "ѫ": 1173,
2465
+ "Ѭ": 1174,
2466
+ "ѭ": 1175,
2467
+ "Ѯ": 1176,
2468
+ "ѯ": 1177,
2469
+ "Ѱ": 1178,
2470
+ "ѱ": 1179,
2471
+ "Ѳ": 1180,
2472
+ "ѳ": 1181,
2473
+ "Ѵ": 1182,
2474
+ "ѵ": 1183,
2475
+ "Ѹ": 1184,
2476
+ "ѹ": 1185,
2477
+ "Ѻ": 1186,
2478
+ "ѻ": 1187,
2479
+ "Ѽ": 1188,
2480
+ "ѽ": 1189,
2481
+ "Ѿ": 1190,
2482
+ "ѿ": 1191,
2483
+ "Ҁ": 1192,
2484
+ "ҁ": 1193,
2485
+ "҂": 1194,
2486
+ "҃": 1195,
2487
+ "҄": 1196,
2488
+ "҅": 1197,
2489
+ "҆": 1198,
2490
+ "҇": 1199,
2491
+ "҈": 1200,
2492
+ "҉": 1201,
2493
+ "Ҋ": 1202,
2494
+ "ҋ": 1203,
2495
+ "Ҍ": 1204,
2496
+ "ҍ": 1205,
2497
+ "Ҏ": 1206,
2498
+ "ҏ": 1207,
2499
+ "Ґ": 1208,
2500
+ "ґ": 1209,
2501
+ "Ғ": 1210,
2502
+ "ғ": 1211,
2503
+ "Ҕ": 1212,
2504
+ "ҕ": 1213,
2505
+ "Җ": 1214,
2506
+ "җ": 1215,
2507
+ "Ҙ": 1216,
2508
+ "ҙ": 1217,
2509
+ "Қ": 1218,
2510
+ "қ": 1219,
2511
+ "Ҝ": 1220,
2512
+ "ҝ": 1221,
2513
+ "Ҟ": 1222,
2514
+ "ҟ": 1223,
2515
+ "Ҡ": 1224,
2516
+ "ҡ": 1225,
2517
+ "Ң": 1226,
2518
+ "ң": 1227,
2519
+ "Ҥ": 1228,
2520
+ "ҥ": 1229,
2521
+ "Ҧ": 1230,
2522
+ "ҧ": 1231,
2523
+ "Ҩ": 1232,
2524
+ "ҩ": 1233,
2525
+ "Ҫ": 1234,
2526
+ "ҫ": 1235,
2527
+ "Ҭ": 1236,
2528
+ "ҭ": 1237,
2529
+ "Ү": 1238,
2530
+ "ү": 1239,
2531
+ "Ұ": 1240,
2532
+ "ұ": 1241,
2533
+ "Ҳ": 1242,
2534
+ "ҳ": 1243,
2535
+ "Ҵ": 1244,
2536
+ "ҵ": 1245,
2537
+ "Ҷ": 1246,
2538
+ "ҷ": 1247,
2539
+ "Ҹ": 1248,
2540
+ "ҹ": 1249,
2541
+ "Һ": 1250,
2542
+ "һ": 1251,
2543
+ "Ҽ": 1252,
2544
+ "ҽ": 1253,
2545
+ "Ҿ": 1254,
2546
+ "ҿ": 1255,
2547
+ "Ӏ": 1256,
2548
+ "Ӄ": 1257,
2549
+ "ӄ": 1258,
2550
+ "Ӆ": 1259,
2551
+ "ӆ": 1260,
2552
+ "Ӈ": 1261,
2553
+ "ӈ": 1262,
2554
+ "Ӊ": 1263,
2555
+ "ӊ": 1264,
2556
+ "Ӌ": 1265,
2557
+ "ӌ": 1266,
2558
+ "Ӎ": 1267,
2559
+ "ӎ": 1268,
2560
+ "ӏ": 1269,
2561
+ "Ӕ": 1270,
2562
+ "ӕ": 1271,
2563
+ "Ә": 1272,
2564
+ "ә": 1273,
2565
+ "Ӡ": 1274,
2566
+ "ӡ": 1275,
2567
+ "Ө": 1276,
2568
+ "ө": 1277,
2569
+ "Ӷ": 1278,
2570
+ "ӷ": 1279,
2571
+ "Ӻ": 1280,
2572
+ "ӻ": 1281,
2573
+ "Ӽ": 1282,
2574
+ "ӽ": 1283,
2575
+ "Ӿ": 1284,
2576
+ "ӿ": 1285,
2577
+ "Ԁ": 1286,
2578
+ "ԁ": 1287,
2579
+ "Ԃ": 1288,
2580
+ "ԃ": 1289,
2581
+ "Ԅ": 1290,
2582
+ "ԅ": 1291,
2583
+ "Ԇ": 1292,
2584
+ "ԇ": 1293,
2585
+ "Ԉ": 1294,
2586
+ "ԉ": 1295,
2587
+ "Ԋ": 1296,
2588
+ "ԋ": 1297,
2589
+ "Ԍ": 1298,
2590
+ "ԍ": 1299,
2591
+ "Ԏ": 1300,
2592
+ "ԏ": 1301,
2593
+ "Ԑ": 1302,
2594
+ "ԑ": 1303,
2595
+ "Ԓ": 1304,
2596
+ "ԓ": 1305,
2597
+ "Ԕ": 1306,
2598
+ "ԕ": 1307,
2599
+ "Ԗ": 1308,
2600
+ "ԗ": 1309,
2601
+ "Ԙ": 1310,
2602
+ "ԙ": 1311,
2603
+ "Ԛ": 1312,
2604
+ "ԛ": 1313,
2605
+ "Ԝ": 1314,
2606
+ "ԝ": 1315,
2607
+ "Ԟ": 1316,
2608
+ "ԟ": 1317,
2609
+ "Ԡ": 1318,
2610
+ "ԡ": 1319,
2611
+ "Ԣ": 1320,
2612
+ "ԣ": 1321,
2613
+ "Ԥ": 1322,
2614
+ "ԥ": 1323,
2615
+ "Ԧ": 1324,
2616
+ "ԧ": 1325,
2617
+ "Ԩ": 1326,
2618
+ "ԩ": 1327,
2619
+ "Ԫ": 1328,
2620
+ "ԫ": 1329,
2621
+ "Ԭ": 1330,
2622
+ "ԭ": 1331,
2623
+ "Ԯ": 1332,
2624
+ "ԯ": 1333,
2625
+ "֑": 1334,
2626
+ "֒": 1335,
2627
+ "֓": 1336,
2628
+ "֔": 1337,
2629
+ "֕": 1338,
2630
+ "֖": 1339,
2631
+ "֗": 1340,
2632
+ "֘": 1341,
2633
+ "֙": 1342,
2634
+ "֚": 1343,
2635
+ "֛": 1344,
2636
+ "֜": 1345,
2637
+ "֝": 1346,
2638
+ "֞": 1347,
2639
+ "֟": 1348,
2640
+ "֠": 1349,
2641
+ "֡": 1350,
2642
+ "֢": 1351,
2643
+ "֣": 1352,
2644
+ "֤": 1353,
2645
+ "֥": 1354,
2646
+ "֦": 1355,
2647
+ "֧": 1356,
2648
+ "֨": 1357,
2649
+ "֩": 1358,
2650
+ "֪": 1359,
2651
+ "֫": 1360,
2652
+ "֬": 1361,
2653
+ "֭": 1362,
2654
+ "֮": 1363,
2655
+ "֯": 1364,
2656
+ "ְ": 1365,
2657
+ "ֱ": 1366,
2658
+ "ֲ": 1367,
2659
+ "ֳ": 1368,
2660
+ "ִ": 1369,
2661
+ "ֵ": 1370,
2662
+ "ֶ": 1371,
2663
+ "ַ": 1372,
2664
+ "ָ": 1373,
2665
+ "ֹ": 1374,
2666
+ "ֺ": 1375,
2667
+ "ֻ": 1376,
2668
+ "ּ": 1377,
2669
+ "ֽ": 1378,
2670
+ "־": 1379,
2671
+ "ֿ": 1380,
2672
+ "׀": 1381,
2673
+ "ׁ": 1382,
2674
+ "ׂ": 1383,
2675
+ "׃": 1384,
2676
+ "ׄ": 1385,
2677
+ "ׅ": 1386,
2678
+ "׆": 1387,
2679
+ "ׇ": 1388,
2680
+ "א": 1389,
2681
+ "ב": 1390,
2682
+ "ג": 1391,
2683
+ "ד": 1392,
2684
+ "ה": 1393,
2685
+ "ו": 1394,
2686
+ "ז": 1395,
2687
+ "ח": 1396,
2688
+ "ט": 1397,
2689
+ "י": 1398,
2690
+ "ך": 1399,
2691
+ "כ": 1400,
2692
+ "ל": 1401,
2693
+ "ם": 1402,
2694
+ "מ": 1403,
2695
+ "ן": 1404,
2696
+ "נ": 1405,
2697
+ "ס": 1406,
2698
+ "ע": 1407,
2699
+ "ף": 1408,
2700
+ "פ": 1409,
2701
+ "ץ": 1410,
2702
+ "צ": 1411,
2703
+ "ק": 1412,
2704
+ "ר": 1413,
2705
+ "ש": 1414,
2706
+ "ת": 1415,
2707
+ "ׯ": 1416,
2708
+ "װ": 1417,
2709
+ "ױ": 1418,
2710
+ "ײ": 1419,
2711
+ "׳": 1420,
2712
+ "״": 1421,
2713
+ "؀": 1422,
2714
+ "؁": 1423,
2715
+ "؂": 1424,
2716
+ "؃": 1425,
2717
+ "؄": 1426,
2718
+ "؅": 1427,
2719
+ "؆": 1428,
2720
+ "؇": 1429,
2721
+ "؈": 1430,
2722
+ "؉": 1431,
2723
+ "؊": 1432,
2724
+ "؋": 1433,
2725
+ "،": 1434,
2726
+ "؍": 1435,
2727
+ "؎": 1436,
2728
+ "؏": 1437,
2729
+ "ؐ": 1438,
2730
+ "ؑ": 1439,
2731
+ "ؒ": 1440,
2732
+ "ؓ": 1441,
2733
+ "ؔ": 1442,
2734
+ "ؕ": 1443,
2735
+ "ؖ": 1444,
2736
+ "ؗ": 1445,
2737
+ "ؘ": 1446,
2738
+ "ؙ": 1447,
2739
+ "ؚ": 1448,
2740
+ "؛": 1449,
2741
+ "؜": 1450,
2742
+ "؝": 1451,
2743
+ "؞": 1452,
2744
+ "؟": 1453,
2745
+ "ؠ": 1454,
2746
+ "ء": 1455,
2747
+ "ا": 1456,
2748
+ "ٓ": 1457,
2749
+ "ٔ": 1458,
2750
+ "و": 1459,
2751
+ "ٕ": 1460,
2752
+ "ي": 1461,
2753
+ "ب": 1462,
2754
+ "ة": 1463,
2755
+ "ت": 1464,
2756
+ "ث": 1465,
2757
+ "ج": 1466,
2758
+ "ح": 1467,
2759
+ "خ": 1468,
2760
+ "د": 1469,
2761
+ "ذ": 1470,
2762
+ "ر": 1471,
2763
+ "ز": 1472,
2764
+ "س": 1473,
2765
+ "ش": 1474,
2766
+ "ص": 1475,
2767
+ "ض": 1476,
2768
+ "ط": 1477,
2769
+ "ظ": 1478,
2770
+ "ع": 1479,
2771
+ "غ": 1480,
2772
+ "ػ": 1481,
2773
+ "ؼ": 1482,
2774
+ "ؽ": 1483,
2775
+ "ؾ": 1484,
2776
+ "ؿ": 1485,
2777
+ "ـ": 1486,
2778
+ "ف": 1487,
2779
+ "ق": 1488,
2780
+ "ك": 1489,
2781
+ "ل": 1490,
2782
+ "م": 1491,
2783
+ "ن": 1492,
2784
+ "ه": 1493,
2785
+ "ى": 1494,
2786
+ "ً": 1495,
2787
+ "ٌ": 1496,
2788
+ "ٍ": 1497,
2789
+ "َ": 1498,
2790
+ "ُ": 1499,
2791
+ "ِ": 1500,
2792
+ "ّ": 1501,
2793
+ "ْ": 1502,
2794
+ "ٖ": 1503,
2795
+ "ٗ": 1504,
2796
+ "٘": 1505,
2797
+ "ٙ": 1506,
2798
+ "ٚ": 1507,
2799
+ "ٛ": 1508,
2800
+ "ٜ": 1509,
2801
+ "ٝ": 1510,
2802
+ "ٞ": 1511,
2803
+ "ٟ": 1512,
2804
+ "٠": 1513,
2805
+ "١": 1514,
2806
+ "٢": 1515,
2807
+ "٣": 1516,
2808
+ "٤": 1517,
2809
+ "٥": 1518,
2810
+ "٦": 1519,
2811
+ "٧": 1520,
2812
+ "٨": 1521,
2813
+ "٩": 1522,
2814
+ "٪": 1523,
2815
+ "٫": 1524,
2816
+ "٬": 1525,
2817
+ "٭": 1526,
2818
+ "ٮ": 1527,
2819
+ "ٯ": 1528,
2820
+ "ٰ": 1529,
2821
+ "ٱ": 1530,
2822
+ "ٲ": 1531,
2823
+ "ٳ": 1532,
2824
+ "ٴ": 1533,
2825
+ "ۇ": 1534,
2826
+ "ٹ": 1535,
2827
+ "ٺ": 1536,
2828
+ "ٻ": 1537,
2829
+ "ټ": 1538,
2830
+ "ٽ": 1539,
2831
+ "پ": 1540,
2832
+ "ٿ": 1541,
2833
+ "ڀ": 1542,
2834
+ "ځ": 1543,
2835
+ "ڂ": 1544,
2836
+ "ڃ": 1545,
2837
+ "ڄ": 1546,
2838
+ "څ": 1547,
2839
+ "چ": 1548,
2840
+ "ڇ": 1549,
2841
+ "ڈ": 1550,
2842
+ "ډ": 1551,
2843
+ "ڊ": 1552,
2844
+ "ڋ": 1553,
2845
+ "ڌ": 1554,
2846
+ "ڍ": 1555,
2847
+ "ڎ": 1556,
2848
+ "ڏ": 1557,
2849
+ "ڐ": 1558,
2850
+ "ڑ": 1559,
2851
+ "ڒ": 1560,
2852
+ "ړ": 1561,
2853
+ "ڔ": 1562,
2854
+ "ڕ": 1563,
2855
+ "ږ": 1564,
2856
+ "ڗ": 1565,
2857
+ "ژ": 1566,
2858
+ "ڙ": 1567,
2859
+ "ښ": 1568,
2860
+ "ڛ": 1569,
2861
+ "ڜ": 1570,
2862
+ "ڝ": 1571,
2863
+ "ڞ": 1572,
2864
+ "ڟ": 1573,
2865
+ "ڠ": 1574,
2866
+ "ڡ": 1575,
2867
+ "ڢ": 1576,
2868
+ "ڣ": 1577,
2869
+ "ڤ": 1578,
2870
+ "ڥ": 1579,
2871
+ "ڦ": 1580,
2872
+ "ڧ": 1581,
2873
+ "ڨ": 1582,
2874
+ "ک": 1583,
2875
+ "ڪ": 1584,
2876
+ "ګ": 1585,
2877
+ "ڬ": 1586,
2878
+ "ڭ": 1587,
2879
+ "ڮ": 1588,
2880
+ "گ": 1589,
2881
+ "ڰ": 1590,
2882
+ "ڱ": 1591,
2883
+ "ڲ": 1592,
2884
+ "ڳ": 1593,
2885
+ "ڴ": 1594,
2886
+ "ڵ": 1595,
2887
+ "ڶ": 1596,
2888
+ "ڷ": 1597,
2889
+ "ڸ": 1598,
2890
+ "ڹ": 1599,
2891
+ "ں": 1600,
2892
+ "ڻ": 1601,
2893
+ "ڼ": 1602,
2894
+ "ڽ": 1603,
2895
+ "ھ": 1604,
2896
+ "ڿ": 1605,
2897
+ "ە": 1606,
2898
+ "ہ": 1607,
2899
+ "ۃ": 1608,
2900
+ "ۄ": 1609,
2901
+ "ۅ": 1610,
2902
+ "ۆ": 1611,
2903
+ "ۈ": 1612,
2904
+ "ۉ": 1613,
2905
+ "ۊ": 1614,
2906
+ "ۋ": 1615,
2907
+ "ی": 1616,
2908
+ "ۍ": 1617,
2909
+ "ێ": 1618,
2910
+ "ۏ": 1619,
2911
+ "ې": 1620,
2912
+ "ۑ": 1621,
2913
+ "ے": 1622,
2914
+ "۔": 1623,
2915
+ "ۖ": 1624,
2916
+ "ۗ": 1625,
2917
+ "ۘ": 1626,
2918
+ "ۙ": 1627,
2919
+ "ۚ": 1628,
2920
+ "ۛ": 1629,
2921
+ "ۜ": 1630,
2922
+ "۝": 1631,
2923
+ "۞": 1632,
2924
+ "۟": 1633,
2925
+ "۠": 1634,
2926
+ "ۡ": 1635,
2927
+ "ۢ": 1636,
2928
+ "ۣ": 1637,
2929
+ "ۤ": 1638,
2930
+ "ۥ": 1639,
2931
+ "ۦ": 1640,
2932
+ "ۧ": 1641,
2933
+ "ۨ": 1642,
2934
+ "۩": 1643,
2935
+ "۪": 1644,
2936
+ "۫": 1645,
2937
+ "۬": 1646,
2938
+ "ۭ": 1647,
2939
+ "ۮ": 1648,
2940
+ "ۯ": 1649,
2941
+ "۰": 1650,
2942
+ "۱": 1651,
2943
+ "۲": 1652,
2944
+ "۳": 1653,
2945
+ "۴": 1654,
2946
+ "۵": 1655,
2947
+ "۶": 1656,
2948
+ "۷": 1657,
2949
+ "۸": 1658,
2950
+ "۹": 1659,
2951
+ "ۺ": 1660,
2952
+ "ۻ": 1661,
2953
+ "ۼ": 1662,
2954
+ "۽": 1663,
2955
+ "۾": 1664,
2956
+ "ۿ": 1665,
2957
+ "ऀ": 1666,
2958
+ "ँ": 1667,
2959
+ "ं": 1668,
2960
+ "ः": 1669,
2961
+ "ऄ": 1670,
2962
+ "अ": 1671,
2963
+ "आ": 1672,
2964
+ "इ": 1673,
2965
+ "ई": 1674,
2966
+ "उ": 1675,
2967
+ "ऊ": 1676,
2968
+ "ऋ": 1677,
2969
+ "ऌ": 1678,
2970
+ "ऍ": 1679,
2971
+ "ऎ": 1680,
2972
+ "ए": 1681,
2973
+ "ऐ": 1682,
2974
+ "ऑ": 1683,
2975
+ "ऒ": 1684,
2976
+ "ओ": 1685,
2977
+ "औ": 1686,
2978
+ "क": 1687,
2979
+ "ख": 1688,
2980
+ "ग": 1689,
2981
+ "घ": 1690,
2982
+ "ङ": 1691,
2983
+ "च": 1692,
2984
+ "छ": 1693,
2985
+ "ज": 1694,
2986
+ "झ": 1695,
2987
+ "ञ": 1696,
2988
+ "ट": 1697,
2989
+ "ठ": 1698,
2990
+ "ड": 1699,
2991
+ "ढ": 1700,
2992
+ "ण": 1701,
2993
+ "त": 1702,
2994
+ "थ": 1703,
2995
+ "द": 1704,
2996
+ "ध": 1705,
2997
+ "न": 1706,
2998
+ "़": 1707,
2999
+ "प": 1708,
3000
+ "फ": 1709,
3001
+ "ब": 1710,
3002
+ "भ": 1711,
3003
+ "म": 1712,
3004
+ "य": 1713,
3005
+ "र": 1714,
3006
+ "ल": 1715,
3007
+ "ळ": 1716,
3008
+ "व": 1717,
3009
+ "श": 1718,
3010
+ "ष": 1719,
3011
+ "स": 1720,
3012
+ "ह": 1721,
3013
+ "ऺ": 1722,
3014
+ "ऻ": 1723,
3015
+ "ऽ": 1724,
3016
+ "ा": 1725,
3017
+ "ि": 1726,
3018
+ "ी": 1727,
3019
+ "ु": 1728,
3020
+ "ू": 1729,
3021
+ "ृ": 1730,
3022
+ "ॄ": 1731,
3023
+ "ॅ": 1732,
3024
+ "ॆ": 1733,
3025
+ "े": 1734,
3026
+ "ै": 1735,
3027
+ "ॉ": 1736,
3028
+ "ॊ": 1737,
3029
+ "ो": 1738,
3030
+ "ौ": 1739,
3031
+ "्": 1740,
3032
+ "ॎ": 1741,
3033
+ "ॏ": 1742,
3034
+ "ॐ": 1743,
3035
+ "॑": 1744,
3036
+ "॒": 1745,
3037
+ "॓": 1746,
3038
+ "॔": 1747,
3039
+ "ॕ": 1748,
3040
+ "ॖ": 1749,
3041
+ "ॗ": 1750,
3042
+ "ॠ": 1751,
3043
+ "ॡ": 1752,
3044
+ "ॢ": 1753,
3045
+ "ॣ": 1754,
3046
+ "।": 1755,
3047
+ "॥": 1756,
3048
+ "०": 1757,
3049
+ "१": 1758,
3050
+ "२": 1759,
3051
+ "३": 1760,
3052
+ "४": 1761,
3053
+ "५": 1762,
3054
+ "६": 1763,
3055
+ "७": 1764,
3056
+ "८": 1765,
3057
+ "९": 1766,
3058
+ "॰": 1767,
3059
+ "ॱ": 1768,
3060
+ "ॲ": 1769,
3061
+ "ॳ": 1770,
3062
+ "ॴ": 1771,
3063
+ "ॵ": 1772,
3064
+ "ॶ": 1773,
3065
+ "ॷ": 1774,
3066
+ "ॸ": 1775,
3067
+ "ॹ": 1776,
3068
+ "ॺ": 1777,
3069
+ "ॻ": 1778,
3070
+ "ॼ": 1779,
3071
+ "ॽ": 1780,
3072
+ "ॾ": 1781,
3073
+ "ॿ": 1782,
3074
+ "ᄀ": 1783,
3075
+ "ᄁ": 1784,
3076
+ "ᄂ": 1785,
3077
+ "ᄃ": 1786,
3078
+ "ᄄ": 1787,
3079
+ "ᄅ": 1788,
3080
+ "ᄆ": 1789,
3081
+ "ᄇ": 1790,
3082
+ "ᄈ": 1791,
3083
+ "ᄉ": 1792,
3084
+ "ᄊ": 1793,
3085
+ "ᄋ": 1794,
3086
+ "ᄌ": 1795,
3087
+ "ᄍ": 1796,
3088
+ "ᄎ": 1797,
3089
+ "ᄏ": 1798,
3090
+ "ᄐ": 1799,
3091
+ "ᄑ": 1800,
3092
+ "ᄒ": 1801,
3093
+ "ᄓ": 1802,
3094
+ "ᄔ": 1803,
3095
+ "ᄕ": 1804,
3096
+ "ᄖ": 1805,
3097
+ "ᄗ": 1806,
3098
+ "ᄘ": 1807,
3099
+ "ᄙ": 1808,
3100
+ "ᄚ": 1809,
3101
+ "ᄛ": 1810,
3102
+ "ᄜ": 1811,
3103
+ "ᄝ": 1812,
3104
+ "ᄞ": 1813,
3105
+ "ᄟ": 1814,
3106
+ "ᄠ": 1815,
3107
+ "ᄡ": 1816,
3108
+ "ᄢ": 1817,
3109
+ "ᄣ": 1818,
3110
+ "ᄤ": 1819,
3111
+ "ᄥ": 1820,
3112
+ "ᄦ": 1821,
3113
+ "ᄧ": 1822,
3114
+ "ᄨ": 1823,
3115
+ "ᄩ": 1824,
3116
+ "ᄪ": 1825,
3117
+ "ᄫ": 1826,
3118
+ "ᄬ": 1827,
3119
+ "ᄭ": 1828,
3120
+ "ᄮ": 1829,
3121
+ "ᄯ": 1830,
3122
+ "ᄰ": 1831,
3123
+ "ᄱ": 1832,
3124
+ "ᄲ": 1833,
3125
+ "ᄳ": 1834,
3126
+ "ᄴ": 1835,
3127
+ "ᄵ": 1836,
3128
+ "ᄶ": 1837,
3129
+ "ᄷ": 1838,
3130
+ "ᄸ": 1839,
3131
+ "ᄹ": 1840,
3132
+ "ᄺ": 1841,
3133
+ "ᄻ": 1842,
3134
+ "ᄼ": 1843,
3135
+ "ᄽ": 1844,
3136
+ "ᄾ": 1845,
3137
+ "ᄿ": 1846,
3138
+ "ᅀ": 1847,
3139
+ "ᅁ": 1848,
3140
+ "ᅂ": 1849,
3141
+ "ᅃ": 1850,
3142
+ "ᅄ": 1851,
3143
+ "ᅅ": 1852,
3144
+ "ᅆ": 1853,
3145
+ "ᅇ": 1854,
3146
+ "ᅈ": 1855,
3147
+ "ᅉ": 1856,
3148
+ "ᅊ": 1857,
3149
+ "ᅋ": 1858,
3150
+ "ᅌ": 1859,
3151
+ "ᅍ": 1860,
3152
+ "ᅎ": 1861,
3153
+ "ᅏ": 1862,
3154
+ "ᅐ": 1863,
3155
+ "ᅑ": 1864,
3156
+ "ᅒ": 1865,
3157
+ "ᅓ": 1866,
3158
+ "ᅔ": 1867,
3159
+ "ᅕ": 1868,
3160
+ "ᅖ": 1869,
3161
+ "ᅗ": 1870,
3162
+ "ᅘ": 1871,
3163
+ "ᅙ": 1872,
3164
+ "ᅚ": 1873,
3165
+ "ᅛ": 1874,
3166
+ "ᅜ": 1875,
3167
+ "ᅝ": 1876,
3168
+ "ᅞ": 1877,
3169
+ "ᅟ": 1878,
3170
+ "ᅠ": 1879,
3171
+ "ᅡ": 1880,
3172
+ "ᅢ": 1881,
3173
+ "ᅣ": 1882,
3174
+ "ᅤ": 1883,
3175
+ "ᅥ": 1884,
3176
+ "ᅦ": 1885,
3177
+ "ᅧ": 1886,
3178
+ "ᅨ": 1887,
3179
+ "ᅩ": 1888,
3180
+ "ᅪ": 1889,
3181
+ "ᅫ": 1890,
3182
+ "ᅬ": 1891,
3183
+ "ᅭ": 1892,
3184
+ "ᅮ": 1893,
3185
+ "ᅯ": 1894,
3186
+ "ᅰ": 1895,
3187
+ "ᅱ": 1896,
3188
+ "ᅲ": 1897,
3189
+ "ᅳ": 1898,
3190
+ "ᅴ": 1899,
3191
+ "ᅵ": 1900,
3192
+ "ᅶ": 1901,
3193
+ "ᅷ": 1902,
3194
+ "ᅸ": 1903,
3195
+ "ᅹ": 1904,
3196
+ "ᅺ": 1905,
3197
+ "ᅻ": 1906,
3198
+ "ᅼ": 1907,
3199
+ "ᅽ": 1908,
3200
+ "ᅾ": 1909,
3201
+ "ᅿ": 1910,
3202
+ "ᆀ": 1911,
3203
+ "ᆁ": 1912,
3204
+ "ᆂ": 1913,
3205
+ "ᆃ": 1914,
3206
+ "ᆄ": 1915,
3207
+ "ᆅ": 1916,
3208
+ "ᆆ": 1917,
3209
+ "ᆇ": 1918,
3210
+ "ᆈ": 1919,
3211
+ "ᆉ": 1920,
3212
+ "ᆊ": 1921,
3213
+ "ᆋ": 1922,
3214
+ "ᆌ": 1923,
3215
+ "ᆍ": 1924,
3216
+ "ᆎ": 1925,
3217
+ "ᆏ": 1926,
3218
+ "ᆐ": 1927,
3219
+ "ᆑ": 1928,
3220
+ "ᆒ": 1929,
3221
+ "ᆓ": 1930,
3222
+ "ᆔ": 1931,
3223
+ "ᆕ": 1932,
3224
+ "ᆖ": 1933,
3225
+ "ᆗ": 1934,
3226
+ "ᆘ": 1935,
3227
+ "ᆙ": 1936,
3228
+ "ᆚ": 1937,
3229
+ "ᆛ": 1938,
3230
+ "ᆜ": 1939,
3231
+ "ᆝ": 1940,
3232
+ "ᆞ": 1941,
3233
+ "ᆟ": 1942,
3234
+ "ᆠ": 1943,
3235
+ "ᆡ": 1944,
3236
+ "ᆢ": 1945,
3237
+ "ᆣ": 1946,
3238
+ "ᆤ": 1947,
3239
+ "ᆥ": 1948,
3240
+ "ᆦ": 1949,
3241
+ "ᆧ": 1950,
3242
+ "ᆨ": 1951,
3243
+ "ᆩ": 1952,
3244
+ "ᆪ": 1953,
3245
+ "ᆫ": 1954,
3246
+ "ᆬ": 1955,
3247
+ "ᆭ": 1956,
3248
+ "ᆮ": 1957,
3249
+ "ᆯ": 1958,
3250
+ "ᆰ": 1959,
3251
+ "ᆱ": 1960,
3252
+ "ᆲ": 1961,
3253
+ "ᆳ": 1962,
3254
+ "ᆴ": 1963,
3255
+ "ᆵ": 1964,
3256
+ "ᆶ": 1965,
3257
+ "ᆷ": 1966,
3258
+ "ᆸ": 1967,
3259
+ "ᆹ": 1968,
3260
+ "ᆺ": 1969,
3261
+ "ᆻ": 1970,
3262
+ "ᆼ": 1971,
3263
+ "ᆽ": 1972,
3264
+ "ᆾ": 1973,
3265
+ "ᆿ": 1974,
3266
+ "ᇀ": 1975,
3267
+ "ᇁ": 1976,
3268
+ "ᇂ": 1977,
3269
+ "ᇃ": 1978,
3270
+ "ᇄ": 1979,
3271
+ "ᇅ": 1980,
3272
+ "ᇆ": 1981,
3273
+ "ᇇ": 1982,
3274
+ "ᇈ": 1983,
3275
+ "ᇉ": 1984,
3276
+ "ᇊ": 1985,
3277
+ "ᇋ": 1986,
3278
+ "ᇌ": 1987,
3279
+ "ᇍ": 1988,
3280
+ "ᇎ": 1989,
3281
+ "ᇏ": 1990,
3282
+ "ᇐ": 1991,
3283
+ "ᇑ": 1992,
3284
+ "ᇒ": 1993,
3285
+ "ᇓ": 1994,
3286
+ "ᇔ": 1995,
3287
+ "ᇕ": 1996,
3288
+ "ᇖ": 1997,
3289
+ "ᇗ": 1998,
3290
+ "ᇘ": 1999,
3291
+ "ᇙ": 2000,
3292
+ "ᇚ": 2001,
3293
+ "ᇛ": 2002,
3294
+ "ᇜ": 2003,
3295
+ "ᇝ": 2004,
3296
+ "ᇞ": 2005,
3297
+ "ᇟ": 2006,
3298
+ "ᇠ": 2007,
3299
+ "ᇡ": 2008,
3300
+ "ᇢ": 2009,
3301
+ "ᇣ": 2010,
3302
+ "ᇤ": 2011,
3303
+ "ᇥ": 2012,
3304
+ "ᇦ": 2013,
3305
+ "ᇧ": 2014,
3306
+ "ᇨ": 2015,
3307
+ "ᇩ": 2016,
3308
+ "ᇪ": 2017,
3309
+ "ᇫ": 2018,
3310
+ "ᇬ": 2019,
3311
+ "ᇭ": 2020,
3312
+ "ᇮ": 2021,
3313
+ "ᇯ": 2022,
3314
+ "ᇰ": 2023,
3315
+ "ᇱ": 2024,
3316
+ "ᇲ": 2025,
3317
+ "ᇳ": 2026,
3318
+ "ᇴ": 2027,
3319
+ "ᇵ": 2028,
3320
+ "ᇶ": 2029,
3321
+ "ᇷ": 2030,
3322
+ "ᇸ": 2031,
3323
+ "ᇹ": 2032,
3324
+ "ᇺ": 2033,
3325
+ "ᇻ": 2034,
3326
+ "ᇼ": 2035,
3327
+ "ᇽ": 2036,
3328
+ "ᇾ": 2037,
3329
+ "ᇿ": 2038,
3330
+ "ẜ": 2039,
3331
+ "ẝ": 2040,
3332
+ "ẞ": 2041,
3333
+ "ẟ": 2042,
3334
+ "Ỻ": 2043,
3335
+ "ỻ": 2044,
3336
+ "Ỽ": 2045,
3337
+ "ỽ": 2046,
3338
+ "Ỿ": 2047,
3339
+ "ỿ": 2048,
3340
+ "​": 2049,
3341
+ "‌": 2050,
3342
+ "‍": 2051,
3343
+ "[cj_3]": 2052,
3344
+ "[cj_4]": 2053,
3345
+ "†": 2054,
3346
+ "‡": 2055,
3347
+ "•": 2056,
3348
+ "‣": 2057,
3349
+ "‧": 2058,
3350
+ "[cj_5]": 2059,
3351
+ "[cj_6]": 2060,
3352
+ "[cj_7]": 2061,
3353
+ "[cj_8]": 2062,
3354
+ "[cj_9]": 2063,
3355
+ "[cj_.]": 2064,
3356
+ "[PLACEHOLDER45]": 2065,
3357
+ "‰": 2066,
3358
+ "‱": 2067,
3359
+ "′": 2068,
3360
+ "‵": 2069,
3361
+ "‸": 2070,
3362
+ "‹": 2071,
3363
+ "›": 2072,
3364
+ "※": 2073,
3365
+ "‽": 2074,
3366
+ "‿": 2075,
3367
+ "⁀": 2076,
3368
+ "⁁": 2077,
3369
+ "⁂": 2078,
3370
+ "⁃": 2079,
3371
+ "⁅": 2080,
3372
+ "⁆": 2081,
3373
+ "⁊": 2082,
3374
+ "⁋": 2083,
3375
+ "⁌": 2084,
3376
+ "⁍": 2085,
3377
+ "⁎": 2086,
3378
+ "⁏": 2087,
3379
+ "⁐": 2088,
3380
+ "⁑": 2089,
3381
+ "⁒": 2090,
3382
+ "⁓": 2091,
3383
+ "⁔": 2092,
3384
+ "⁕": 2093,
3385
+ "⁖": 2094,
3386
+ "⁘": 2095,
3387
+ "⁙": 2096,
3388
+ "⁚": 2097,
3389
+ "⁛": 2098,
3390
+ "⁜": 2099,
3391
+ "⁝": 2100,
3392
+ "⁞": 2101,
3393
+ "⁠": 2102,
3394
+ "⁡": 2103,
3395
+ "⁢": 2104,
3396
+ "⁣": 2105,
3397
+ "⁤": 2106,
3398
+ "[fi]": 2107,
3399
+ "[ta]": 2108,
3400
+ "[ms]": 2109,
3401
+ "[he]": 2110,
3402
+ "": 2111,
3403
+ "": 2112,
3404
+ "": 2113,
3405
+ "": 2114,
3406
+ "": 2115,
3407
+ "": 2116,
3408
+ "−": 2117,
3409
+ "、": 2118,
3410
+ "。": 2119,
3411
+ "〃": 2120,
3412
+ "〄": 2121,
3413
+ "々": 2122,
3414
+ "〆": 2123,
3415
+ "〇": 2124,
3416
+ "〈": 2125,
3417
+ "〉": 2126,
3418
+ "《": 2127,
3419
+ "》": 2128,
3420
+ "「": 2129,
3421
+ "」": 2130,
3422
+ "『": 2131,
3423
+ "』": 2132,
3424
+ "【": 2133,
3425
+ "】": 2134,
3426
+ "〒": 2135,
3427
+ "〓": 2136,
3428
+ "〔": 2137,
3429
+ "〕": 2138,
3430
+ "〖": 2139,
3431
+ "〗": 2140,
3432
+ "〘": 2141,
3433
+ "〙": 2142,
3434
+ "〚": 2143,
3435
+ "〛": 2144,
3436
+ "〜": 2145,
3437
+ "〝": 2146,
3438
+ "〞": 2147,
3439
+ "〟": 2148,
3440
+ "ぁ": 2149,
3441
+ "あ": 2150,
3442
+ "ぃ": 2151,
3443
+ "い": 2152,
3444
+ "ぅ": 2153,
3445
+ "う": 2154,
3446
+ "ぇ": 2155,
3447
+ "え": 2156,
3448
+ "ぉ": 2157,
3449
+ "お": 2158,
3450
+ "か": 2159,
3451
+ "゙": 2160,
3452
+ "き": 2161,
3453
+ "く": 2162,
3454
+ "け": 2163,
3455
+ "こ": 2164,
3456
+ "さ": 2165,
3457
+ "し": 2166,
3458
+ "す": 2167,
3459
+ "せ": 2168,
3460
+ "そ": 2169,
3461
+ "た": 2170,
3462
+ "ち": 2171,
3463
+ "っ": 2172,
3464
+ "つ": 2173,
3465
+ "て": 2174,
3466
+ "と": 2175,
3467
+ "な": 2176,
3468
+ "に": 2177,
3469
+ "ぬ": 2178,
3470
+ "ね": 2179,
3471
+ "の": 2180,
3472
+ "は": 2181,
3473
+ "゚": 2182,
3474
+ "ひ": 2183,
3475
+ "ふ": 2184,
3476
+ "へ": 2185,
3477
+ "ほ": 2186,
3478
+ "ま": 2187,
3479
+ "み": 2188,
3480
+ "む": 2189,
3481
+ "め": 2190,
3482
+ "も": 2191,
3483
+ "ゃ": 2192,
3484
+ "や": 2193,
3485
+ "ゅ": 2194,
3486
+ "ゆ": 2195,
3487
+ "ょ": 2196,
3488
+ "よ": 2197,
3489
+ "ら": 2198,
3490
+ "り": 2199,
3491
+ "る": 2200,
3492
+ "れ": 2201,
3493
+ "ろ": 2202,
3494
+ "ゎ": 2203,
3495
+ "わ": 2204,
3496
+ "ゐ": 2205,
3497
+ "ゑ": 2206,
3498
+ "を": 2207,
3499
+ "ん": 2208,
3500
+ "ゕ": 2209,
3501
+ "ゖ": 2210,
3502
+ "ゝ": 2211,
3503
+ "゠": 2212,
3504
+ "ァ": 2213,
3505
+ "ア": 2214,
3506
+ "ィ": 2215,
3507
+ "イ": 2216,
3508
+ "ゥ": 2217,
3509
+ "ウ": 2218,
3510
+ "ェ": 2219,
3511
+ "エ": 2220,
3512
+ "ォ": 2221,
3513
+ "オ": 2222,
3514
+ "カ": 2223,
3515
+ "キ": 2224,
3516
+ "ク": 2225,
3517
+ "ケ": 2226,
3518
+ "コ": 2227,
3519
+ "サ": 2228,
3520
+ "シ": 2229,
3521
+ "ス": 2230,
3522
+ "セ": 2231,
3523
+ "ソ": 2232,
3524
+ "タ": 2233,
3525
+ "チ": 2234,
3526
+ "ッ": 2235,
3527
+ "ツ": 2236,
3528
+ "テ": 2237,
3529
+ "ト": 2238,
3530
+ "ナ": 2239,
3531
+ "ニ": 2240,
3532
+ "ヌ": 2241,
3533
+ "ネ": 2242,
3534
+ "ノ": 2243,
3535
+ "ハ": 2244,
3536
+ "ヒ": 2245,
3537
+ "フ": 2246,
3538
+ "ヘ": 2247,
3539
+ "ホ": 2248,
3540
+ "マ": 2249,
3541
+ "ミ": 2250,
3542
+ "ム": 2251,
3543
+ "メ": 2252,
3544
+ "モ": 2253,
3545
+ "ャ": 2254,
3546
+ "ヤ": 2255,
3547
+ "ュ": 2256,
3548
+ "ユ": 2257,
3549
+ "ョ": 2258,
3550
+ "ヨ": 2259,
3551
+ "ラ": 2260,
3552
+ "リ": 2261,
3553
+ "ル": 2262,
3554
+ "レ": 2263,
3555
+ "ロ": 2264,
3556
+ "ヮ": 2265,
3557
+ "ワ": 2266,
3558
+ "ヰ": 2267,
3559
+ "ヱ": 2268,
3560
+ "ヲ": 2269,
3561
+ "ン": 2270,
3562
+ "ヵ": 2271,
3563
+ "ヶ": 2272,
3564
+ "・": 2273,
3565
+ "ー": 2274,
3566
+ "ヽ": 2275,
3567
+ "ㄅ": 2276,
3568
+ "ㄆ": 2277,
3569
+ "ㄇ": 2278,
3570
+ "ㄈ": 2279,
3571
+ "ㄉ": 2280,
3572
+ "ㄊ": 2281,
3573
+ "ㄋ": 2282,
3574
+ "ㄌ": 2283,
3575
+ "ㄍ": 2284,
3576
+ "ㄎ": 2285,
3577
+ "ㄏ": 2286,
3578
+ "ㄐ": 2287,
3579
+ "ㄑ": 2288,
3580
+ "ㄒ": 2289,
3581
+ "ㄓ": 2290,
3582
+ "ㄔ": 2291,
3583
+ "ㄕ": 2292,
3584
+ "ㄖ": 2293,
3585
+ "ㄗ": 2294,
3586
+ "ㄘ": 2295,
3587
+ "ㄙ": 2296,
3588
+ "ㄚ": 2297,
3589
+ "ㄛ": 2298,
3590
+ "ㄜ": 2299,
3591
+ "ㄝ": 2300,
3592
+ "ㄞ": 2301,
3593
+ "ㄟ": 2302,
3594
+ "ㄠ": 2303,
3595
+ "ㄡ": 2304,
3596
+ "ㄢ": 2305,
3597
+ "ㄣ": 2306,
3598
+ "ㄤ": 2307,
3599
+ "ㄥ": 2308,
3600
+ "ㄦ": 2309,
3601
+ "ㄧ": 2310,
3602
+ "ㄨ": 2311,
3603
+ "ㄩ": 2312,
3604
+ "ㄪ": 2313,
3605
+ "ㄫ": 2314,
3606
+ "ㄬ": 2315,
3607
+ "ㄭ": 2316,
3608
+ "ㄮ": 2317,
3609
+ "ㄯ": 2318,
3610
+ "ph": 2319,
3611
+ "rr": 2320,
3612
+ "gn": 2321,
3613
+ "ng": 2322,
3614
+ "dt": 2323,
3615
+ "sj": 2324,
3616
+ "uw": 2325,
3617
+ "ff": 2326,
3618
+ "gg": 2327,
3619
+ "kk": 2328,
3620
+ "nn": 2329,
3621
+ "mm": 2330,
3622
+ "ss": 2331,
3623
+ "tt": 2332,
3624
+ "zz": 2333,
3625
+ "ds": 2334,
3626
+ "ai": 2335,
3627
+ "au": 2336,
3628
+ "ea": 2337,
3629
+ "ee": 2338,
3630
+ "ei": 2339,
3631
+ "eu": 2340,
3632
+ "ew": 2341,
3633
+ "ij": 2342,
3634
+ "oa": 2343,
3635
+ "oi": 2344,
3636
+ "oo": 2345,
3637
+ "ue": 2346,
3638
+ "eau": 2347,
3639
+ "tch": 2348,
3640
+ "sch": 2349,
3641
+ "🙊": 2350,
3642
+ "🤭": 2351,
3643
+ "€": 2352,
3644
+ "أ": 2353,
3645
+ "إ": 2354,
3646
+ "ئ": 2355,
3647
+ "آ": 2356,
3648
+ "ؤ": 2357,
3649
+ "ﻻ": 2358,
3650
+ "ﺃ": 2359,
3651
+ "ę": 2360,
3652
+ "ą": 2361,
3653
+ "ż": 2362,
3654
+ "ś": 2363,
3655
+ "ć": 2364,
3656
+ "ń": 2365,
3657
+ "ź": 2366,
3658
+ "Ś": 2367,
3659
+ "Ź": 2368,
3660
+ "Ż": 2369,
3661
+ "Ć": 2370,
3662
+ "Š": 2371,
3663
+ "Ő": 2372,
3664
+ "й": 2373,
3665
+ "ё": 2374,
3666
+ "Й": 2375,
3667
+ "Ё": 2376,
3668
+ "が": 2377,
3669
+ "で": 2378,
3670
+ "じ": 2379,
3671
+ "だ": 2380,
3672
+ "ど": 2381,
3673
+ "ば": 2382,
3674
+ "げ": 2383,
3675
+ "ご": 2384,
3676
+ "ぶ": 2385,
3677
+ "ぎ": 2386,
3678
+ ",": 2387,
3679
+ "(": 2388,
3680
+ ":": 2389,
3681
+ ";": 2390,
3682
+ "?": 2391,
3683
+ "!": 2392,
3684
+ "#": 2393,
3685
+ " )": 2394,
3686
+ "ά": 2395,
3687
+ "ό": 2396,
3688
+ "ί": 2397,
3689
+ "έ": 2398,
3690
+ "ή": 2399,
3691
+ "ύ": 2400,
3692
+ "ώ": 2401,
3693
+ "Έ": 2402,
3694
+ "Ό": 2403,
3695
+ "Ή": 2404,
3696
+ "ž": 2405,
3697
+ "š": 2406,
3698
+ "ū": 2407,
3699
+ "ş": 2408,
3700
+ "Ō": 2409,
3701
+ "ī": 2410,
3702
+ "č": 2411,
3703
+ "ř": 2412,
3704
+ "ă": 2413,
3705
+ "이": 2414,
3706
+ "기": 2415,
3707
+ "요": 2416,
3708
+ "에": 2417,
3709
+ "다": 2418,
3710
+ "을": 2419,
3711
+ "은": 2420,
3712
+ "서": 2421,
3713
+ "니": 2422,
3714
+ "어": 2423,
3715
+ "ě": 2424,
3716
+ "ů": 2425,
3717
+ "Č": 2426,
3718
+ "ň": 2427,
3719
+ "ď": 2428,
3720
+ "ť": 2429,
3721
+ "♭": 2430,
3722
+ "ľ": 2431,
3723
+ "ĺ": 2432,
3724
+ "ğ": 2433,
3725
+ "İ": 2434,
3726
+ "Ş": 2435,
3727
+ "ड़": 2436,
3728
+ "ढ़": 2437,
3729
+ "ज़": 2438,
3730
+ "फ़": 2439,
3731
+ "ख़": 2440,
3732
+ "क़": 2441,
3733
+ "ग़": 2442,
3734
+ "Ά": 2443,
3735
+ "ϊ": 2444,
3736
+ "Ί": 2445,
3737
+ "Ύ": 2446,
3738
+ "Ώ": 2447,
3739
+ "ΐ": 2448,
3740
+ "ϋ": 2449,
3741
+ "ũ": 2450,
3742
+ "ụ": 2451,
3743
+ "ọ": 2452,
3744
+ "ạ": 2453
3745
+ },
3746
+ "merges": [
3747
+ "t h",
3748
+ "i n",
3749
+ "th e",
3750
+ "a n",
3751
+ "e r",
3752
+ "o u",
3753
+ "r e",
3754
+ "o n",
3755
+ "a t",
3756
+ "e d",
3757
+ "e n",
3758
+ "t o",
3759
+ "in g",
3760
+ "an d",
3761
+ "i s",
3762
+ "a s",
3763
+ "a l",
3764
+ "o r",
3765
+ "o f",
3766
+ "a r",
3767
+ "i t",
3768
+ "e s",
3769
+ "h e",
3770
+ "s t",
3771
+ "l e",
3772
+ "o m",
3773
+ "s e",
3774
+ "b e",
3775
+ "a d",
3776
+ "o w",
3777
+ "l y",
3778
+ "c h",
3779
+ "w h",
3780
+ "th at",
3781
+ "y ou",
3782
+ "l i",
3783
+ "v e",
3784
+ "a c",
3785
+ "t i",
3786
+ "l d",
3787
+ "m e",
3788
+ "w as",
3789
+ "g h",
3790
+ "i d",
3791
+ "l l",
3792
+ "w i",
3793
+ "en t",
3794
+ "f or",
3795
+ "a y",
3796
+ "r o",
3797
+ "v er",
3798
+ "i c",
3799
+ "h er",
3800
+ "k e",
3801
+ "h is",
3802
+ "n o",
3803
+ "u t",
3804
+ "u n",
3805
+ "i r",
3806
+ "l o",
3807
+ "w e",
3808
+ "r i",
3809
+ "h a",
3810
+ "wi th",
3811
+ "gh t",
3812
+ "ou t",
3813
+ "i m",
3814
+ "i on",
3815
+ "al l",
3816
+ "a b",
3817
+ "on e",
3818
+ "n e",
3819
+ "g e",
3820
+ "ou ld",
3821
+ "t er",
3822
+ "m o",
3823
+ "h ad",
3824
+ "c e",
3825
+ "s he",
3826
+ "g o",
3827
+ "s h",
3828
+ "u r",
3829
+ "a m",
3830
+ "s o",
3831
+ "p e",
3832
+ "m y",
3833
+ "d e",
3834
+ "a re",
3835
+ "b ut",
3836
+ "om e",
3837
+ "f r",
3838
+ "the r",
3839
+ "f e",
3840
+ "s u",
3841
+ "d o",
3842
+ "c on",
3843
+ "t e",
3844
+ "a in",
3845
+ "er e",
3846
+ "p o",
3847
+ "i f",
3848
+ "the y",
3849
+ "u s",
3850
+ "a g",
3851
+ "t r",
3852
+ "n ow",
3853
+ "ou n",
3854
+ "th is",
3855
+ "ha ve",
3856
+ "no t",
3857
+ "s a",
3858
+ "i l",
3859
+ "u p",
3860
+ "th ing",
3861
+ "fr om",
3862
+ "a p",
3863
+ "h im",
3864
+ "ac k",
3865
+ "at ion",
3866
+ "an t",
3867
+ "ou r",
3868
+ "o p",
3869
+ "li ke",
3870
+ "u st",
3871
+ "es s",
3872
+ "b o",
3873
+ "o k",
3874
+ "u l",
3875
+ "in d",
3876
+ "e x",
3877
+ "c om",
3878
+ "s ome",
3879
+ "the re",
3880
+ "er s",
3881
+ "c o",
3882
+ "re s",
3883
+ "m an",
3884
+ "ar d",
3885
+ "p l",
3886
+ "w or",
3887
+ "w ay",
3888
+ "ti on",
3889
+ "f o",
3890
+ "c a",
3891
+ "w ere",
3892
+ "b y",
3893
+ "at e",
3894
+ "p ro",
3895
+ "t ed",
3896
+ "oun d",
3897
+ "ow n",
3898
+ "w ould",
3899
+ "t s",
3900
+ "wh at",
3901
+ "q u",
3902
+ "al ly",
3903
+ "i ght",
3904
+ "c k",
3905
+ "g r",
3906
+ "wh en",
3907
+ "v en",
3908
+ "c an",
3909
+ "ou gh",
3910
+ "in e",
3911
+ "en d",
3912
+ "p er",
3913
+ "ou s",
3914
+ "o d",
3915
+ "id e",
3916
+ "k now",
3917
+ "t y",
3918
+ "ver y",
3919
+ "s i",
3920
+ "a k",
3921
+ "wh o",
3922
+ "ab out",
3923
+ "i ll",
3924
+ "the m",
3925
+ "es t",
3926
+ "re d",
3927
+ "y e",
3928
+ "c ould",
3929
+ "on g",
3930
+ "you r",
3931
+ "the ir",
3932
+ "e m",
3933
+ "j ust",
3934
+ "o ther",
3935
+ "in to",
3936
+ "an y",
3937
+ "wh i",
3938
+ "u m",
3939
+ "t w",
3940
+ "as t",
3941
+ "d er",
3942
+ "d id",
3943
+ "i e",
3944
+ "be en",
3945
+ "ac e",
3946
+ "in k",
3947
+ "it y",
3948
+ "b ack",
3949
+ "t ing",
3950
+ "b r",
3951
+ "mo re",
3952
+ "a ke",
3953
+ "p p",
3954
+ "the n",
3955
+ "s p",
3956
+ "e l",
3957
+ "u se",
3958
+ "b l",
3959
+ "sa id",
3960
+ "o ver",
3961
+ "ge t",
3962
+ "ɑ ː",
3963
+ "i ː",
3964
+ "u ː",
3965
+ "ɜ ː",
3966
+ "ɔ ː",
3967
+ "o ː",
3968
+ "e ɪ",
3969
+ "o ʊ",
3970
+ "a ɪ",
3971
+ "a ʊ",
3972
+ "ɔ ɪ",
3973
+ "d ʒ",
3974
+ "t ʃ",
3975
+ "ɪ ŋ",
3976
+ "ᵻ d",
3977
+ "ˈ iː",
3978
+ "ˌ iː",
3979
+ "ˈ ɪ",
3980
+ "ˌ ɪ",
3981
+ "ˈ eɪ",
3982
+ "ˌ eɪ",
3983
+ "ˈ ɛ",
3984
+ "ˌ ɛ",
3985
+ "ˈ æ",
3986
+ "ˌ æ",
3987
+ "ˈ ɑː",
3988
+ "ˌ ɑː",
3989
+ "ˈ ɔː",
3990
+ "ˌ ɔː",
3991
+ "oː ɹ",
3992
+ "ˈ oːɹ",
3993
+ "ˌ oːɹ",
3994
+ "ˈ oʊ",
3995
+ "ˌ oʊ",
3996
+ "ˈ ʊ",
3997
+ "ˌ ʊ",
3998
+ "ˈ uː",
3999
+ "ˌ uː",
4000
+ "ˈ ɜː",
4001
+ "ˌ ɜː",
4002
+ "ˈ ʌ",
4003
+ "ˌ ʌ",
4004
+ "ˈ aɪ",
4005
+ "ˌ aɪ",
4006
+ "ˈ aʊ",
4007
+ "ˌ aʊ",
4008
+ "ˈ ɔɪ",
4009
+ "ˌ ɔɪ",
4010
+ "ˈ ɚ",
4011
+ "ˌ ɐ"
4012
+ ]
4013
+ }
4014
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": true,
4
+ "bos_token": "<s>",
5
+ "eos_token": "</s>",
6
+ "model_input_names": [
7
+ "input_ids",
8
+ "attention_mask"
9
+ ],
10
+ "model_max_length": 131072,
11
+ "tokenizer_class": "PreTrainedTokenizerFast"
12
+ }