File size: 5,980 Bytes
4e9e0e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
{
  "add_prefix_space": false,
  "backend": "tokenizers",
  "bos_token": "<s>",
  "clean_up_tokenization_spaces": false,
  "effective_vocab_size": 114822,
  "eos_token": "</s>",
  "errors": "replace",
  "extra_special_tokens": [
    "<|bos|>",
    "<|eos|>",
    "<|unk|>",
    "<|pad|>",
    "<|system|>",
    "<|user_channel|>",
    "<|assistant_channel|>",
    "<|task:speech_to_text|>",
    "<|task:text_to_speech|>",
    "<|input_audio_start|>",
    "<|input_audio_end|>",
    "<|audio_ref_start|>",
    "<|audio_ref_end|>",
    "<|audio_start|>",
    "<|audio_end|>",
    "<|speech_start|>",
    "<|speech_end|>",
    "<|transcript_start|>",
    "<|transcript_end|>",
    "<|segment_start|>",
    "<|segment_end|>",
    "<|speaker|>",
    "<|start_time|>",
    "<|end_time|>",
    "<|duration|>",
    "<|content|>",
    "<|non_speech_event|>",
    "<|retrieval_result_start|>",
    "<|retrieval_result_end|>",
    "<|ocr_start|>",
    "<|ocr_end|>",
    "<|image_start|>",
    "<|image_end|>",
    "<|video_start|>",
    "<|video_end|>",
    "<|user|>",
    "<|assistant|>",
    "<|tool_call|>",
    "<|tool_response|>",
    "<|endoftext|>",
    "<think>",
    "</think>",
    "<|no_think|>",
    "<|think|>",
    "<|think_max|>",
    "<|task:text_to_text|>",
    "<|task:speech_to_speech|>",
    "<|task:text_speech_to_text|>",
    "<|task:text_speech_to_speech|>",
    "<|task:full_duplex_speech|>",
    "<|task:agent|>",
    "<|task:tool_use|>",
    "<|task:rag|>",
    "<|task:code_execution|>",
    "<|task:document_qa|>",
    "<|task:data_analysis|>",
    "<|task:workflow|>",
    "<|reasoning_mode:none|>",
    "<|reasoning_mode:short|>",
    "<|reasoning_mode:deep|>",
    "<|reasoning_mode:verify|>",
    "<|private_reasoning_start|>",
    "<|private_reasoning_end|>",
    "<|reasoning_summary_start|>",
    "<|reasoning_summary_end|>",
    "<|plan_start|>",
    "<|plan_end|>",
    "<|step_start|>",
    "<|step_end|>",
    "<|action_start|>",
    "<|action_end|>",
    "<|observation_start|>",
    "<|observation_end|>",
    "<|reflection_start|>",
    "<|reflection_end|>",
    "<|verification_start|>",
    "<|verification_end|>",
    "<|tool_schema_start|>",
    "<|tool_schema_end|>",
    "<|tool_call_start|>",
    "<|tool_call_end|>",
    "<|tool_result_start|>",
    "<|tool_result_end|>",
    "<|tool_error_start|>",
    "<|tool_error_end|>",
    "<|retrieval_query_start|>",
    "<|retrieval_query_end|>",
    "<|citation_start|>",
    "<|citation_end|>",
    "<|memory_read_start|>",
    "<|memory_read_end|>",
    "<|memory_write_start|>",
    "<|memory_write_end|>",
    "<|final_answer_start|>",
    "<|final_answer_end|>",
    "<|json_start|>",
    "<|json_end|>",
    "<|code_start|>",
    "<|code_end|>",
    "<|markdown_start|>",
    "<|markdown_end|>",
    "<|duplex_start|>",
    "<|duplex_end|>",
    "<|system_channel|>",
    "<|listen|>",
    "<|speak|>",
    "<|listen_speak|>",
    "<|output_audio_start|>",
    "<|output_audio_end|>",
    "<|text_start|>",
    "<|text_end|>",
    "<|overlap|>",
    "<|barge_in|>",
    "<|interruption|>",
    "<|interruption_repair|>",
    "<|backchannel|>",
    "<|turn_yield|>",
    "<|hold|>",
    "<|silence|>",
    "<|non_speech|>",
    "<|voice_reference_start|>",
    "<|voice_reference_end|>",
    "<|voice_reference|>",
    "<|voice_switch|>",
    "<|speaker_style|>",
    "<|prosody_control|>",
    "<|zh_tw|>",
    "<|zh_hant|>",
    "<|taigi|>",
    "<|hakka|>",
    "<|bopomofo|>",
    "<|mixed_en|>",
    "<|en|>",
    "<|ja|>",
    "<|ko|>",
    "<|vi|>",
    "<|id|>",
    "<|th|>",
    "<|asr|>",
    "<|tts|>",
    "<|speaker_0|>",
    "<|speaker_1|>",
    "<|speaker_2|>",
    "<|speaker_3|>",
    "<|timestamp|>",
    "<|noise|>",
    "<|laugh|>",
    "<|breath|>",
    "<|pause|>",
    "<|prosody|>",
    "<|pron|>",
    "</|pron|>",
    "<|image|>",
    "<|ocr|>",
    "<|bbox|>",
    "<|line|>",
    "<|table|>",
    "<|row|>",
    "<|col|>",
    "<|cell|>",
    "<|reading_order|>",
    "<|source|>",
    "<|cite|>",
    "<|evidence|>",
    "<|quote|>"
  ],
  "fix_mistral_regex": true,
  "is_local": true,
  "local_files_only": false,
  "model_max_length": 131072,
  "model_type": "byte_level_bpe",
  "no_audio_codec_tokens": true,
  "no_dense_timestamp_tokens": true,
  "open_formosa": {
    "required_special_token_count": 157,
    "required_special_tokens_present": true,
    "required_special_tokens_single_id": true,
    "standard_special_tokens": {
      "bos_token": "<s>",
      "eos_token": "</s>",
      "pad_token": "<pad>",
      "unk_token": "<unk>"
    }
  },
  "pad_token": "<pad>",
  "padding_side": "right",
  "rich_transcription": {
    "allow_non_speech_events": true,
    "compact_json": true,
    "default_format": "json_segments",
    "enabled": true,
    "include_content": true,
    "include_speaker": true,
    "include_start_end": true,
    "no_dense_timestamp_tokens": true,
    "timestamp_precision_digits": 2,
    "timestamp_unit": "seconds"
  },
  "special_tokens": [
    "<|pad|>",
    "<|bos|>",
    "<|eos|>",
    "<|unk|>",
    "<|system|>",
    "<|user_channel|>",
    "<|assistant_channel|>",
    "<|task:speech_to_text|>",
    "<|task:text_to_speech|>",
    "<|input_audio_start|>",
    "<|input_audio_end|>",
    "<|audio_ref_start|>",
    "<|audio_ref_end|>",
    "<|audio_start|>",
    "<|audio_end|>",
    "<|speech_start|>",
    "<|speech_end|>",
    "<|transcript_start|>",
    "<|transcript_end|>",
    "<|segment_start|>",
    "<|segment_end|>",
    "<|speaker|>",
    "<|start_time|>",
    "<|end_time|>",
    "<|duration|>",
    "<|content|>",
    "<|non_speech_event|>",
    "<|retrieval_result_start|>",
    "<|retrieval_result_end|>",
    "<|ocr_start|>",
    "<|ocr_end|>",
    "<|image_start|>",
    "<|image_end|>",
    "<|video_start|>",
    "<|video_end|>"
  ],
  "strict_no_dense_timestamp_tokens": true,
  "tokenizer_class": "GPT2Tokenizer",
  "truncation_side": "right",
  "unk_token": "<unk>",
  "vocab_size": 114688
}