| { |
| "add_prefix_space": false, |
| "backend": "tokenizers", |
| "bos_token": "<s>", |
| "clean_up_tokenization_spaces": false, |
| "effective_vocab_size": 114822, |
| "eos_token": "</s>", |
| "errors": "replace", |
| "extra_special_tokens": [ |
| "<|bos|>", |
| "<|eos|>", |
| "<|unk|>", |
| "<|pad|>", |
| "<|system|>", |
| "<|user_channel|>", |
| "<|assistant_channel|>", |
| "<|task:speech_to_text|>", |
| "<|task:text_to_speech|>", |
| "<|input_audio_start|>", |
| "<|input_audio_end|>", |
| "<|audio_ref_start|>", |
| "<|audio_ref_end|>", |
| "<|audio_start|>", |
| "<|audio_end|>", |
| "<|speech_start|>", |
| "<|speech_end|>", |
| "<|transcript_start|>", |
| "<|transcript_end|>", |
| "<|segment_start|>", |
| "<|segment_end|>", |
| "<|speaker|>", |
| "<|start_time|>", |
| "<|end_time|>", |
| "<|duration|>", |
| "<|content|>", |
| "<|non_speech_event|>", |
| "<|retrieval_result_start|>", |
| "<|retrieval_result_end|>", |
| "<|ocr_start|>", |
| "<|ocr_end|>", |
| "<|image_start|>", |
| "<|image_end|>", |
| "<|video_start|>", |
| "<|video_end|>", |
| "<|user|>", |
| "<|assistant|>", |
| "<|tool_call|>", |
| "<|tool_response|>", |
| "<|endoftext|>", |
| "<think>", |
| "</think>", |
| "<|no_think|>", |
| "<|think|>", |
| "<|think_max|>", |
| "<|task:text_to_text|>", |
| "<|task:speech_to_speech|>", |
| "<|task:text_speech_to_text|>", |
| "<|task:text_speech_to_speech|>", |
| "<|task:full_duplex_speech|>", |
| "<|task:agent|>", |
| "<|task:tool_use|>", |
| "<|task:rag|>", |
| "<|task:code_execution|>", |
| "<|task:document_qa|>", |
| "<|task:data_analysis|>", |
| "<|task:workflow|>", |
| "<|reasoning_mode:none|>", |
| "<|reasoning_mode:short|>", |
| "<|reasoning_mode:deep|>", |
| "<|reasoning_mode:verify|>", |
| "<|private_reasoning_start|>", |
| "<|private_reasoning_end|>", |
| "<|reasoning_summary_start|>", |
| "<|reasoning_summary_end|>", |
| "<|plan_start|>", |
| "<|plan_end|>", |
| "<|step_start|>", |
| "<|step_end|>", |
| "<|action_start|>", |
| "<|action_end|>", |
| "<|observation_start|>", |
| "<|observation_end|>", |
| "<|reflection_start|>", |
| "<|reflection_end|>", |
| "<|verification_start|>", |
| "<|verification_end|>", |
| "<|tool_schema_start|>", |
| "<|tool_schema_end|>", |
| "<|tool_call_start|>", |
| "<|tool_call_end|>", |
| "<|tool_result_start|>", |
| "<|tool_result_end|>", |
| "<|tool_error_start|>", |
| "<|tool_error_end|>", |
| "<|retrieval_query_start|>", |
| "<|retrieval_query_end|>", |
| "<|citation_start|>", |
| "<|citation_end|>", |
| "<|memory_read_start|>", |
| "<|memory_read_end|>", |
| "<|memory_write_start|>", |
| "<|memory_write_end|>", |
| "<|final_answer_start|>", |
| "<|final_answer_end|>", |
| "<|json_start|>", |
| "<|json_end|>", |
| "<|code_start|>", |
| "<|code_end|>", |
| "<|markdown_start|>", |
| "<|markdown_end|>", |
| "<|duplex_start|>", |
| "<|duplex_end|>", |
| "<|system_channel|>", |
| "<|listen|>", |
| "<|speak|>", |
| "<|listen_speak|>", |
| "<|output_audio_start|>", |
| "<|output_audio_end|>", |
| "<|text_start|>", |
| "<|text_end|>", |
| "<|overlap|>", |
| "<|barge_in|>", |
| "<|interruption|>", |
| "<|interruption_repair|>", |
| "<|backchannel|>", |
| "<|turn_yield|>", |
| "<|hold|>", |
| "<|silence|>", |
| "<|non_speech|>", |
| "<|voice_reference_start|>", |
| "<|voice_reference_end|>", |
| "<|voice_reference|>", |
| "<|voice_switch|>", |
| "<|speaker_style|>", |
| "<|prosody_control|>", |
| "<|zh_tw|>", |
| "<|zh_hant|>", |
| "<|taigi|>", |
| "<|hakka|>", |
| "<|bopomofo|>", |
| "<|mixed_en|>", |
| "<|en|>", |
| "<|ja|>", |
| "<|ko|>", |
| "<|vi|>", |
| "<|id|>", |
| "<|th|>", |
| "<|asr|>", |
| "<|tts|>", |
| "<|speaker_0|>", |
| "<|speaker_1|>", |
| "<|speaker_2|>", |
| "<|speaker_3|>", |
| "<|timestamp|>", |
| "<|noise|>", |
| "<|laugh|>", |
| "<|breath|>", |
| "<|pause|>", |
| "<|prosody|>", |
| "<|pron|>", |
| "</|pron|>", |
| "<|image|>", |
| "<|ocr|>", |
| "<|bbox|>", |
| "<|line|>", |
| "<|table|>", |
| "<|row|>", |
| "<|col|>", |
| "<|cell|>", |
| "<|reading_order|>", |
| "<|source|>", |
| "<|cite|>", |
| "<|evidence|>", |
| "<|quote|>" |
| ], |
| "fix_mistral_regex": true, |
| "is_local": true, |
| "local_files_only": false, |
| "model_max_length": 131072, |
| "model_type": "byte_level_bpe", |
| "no_audio_codec_tokens": true, |
| "no_dense_timestamp_tokens": true, |
| "open_formosa": { |
| "required_special_token_count": 157, |
| "required_special_tokens_present": true, |
| "required_special_tokens_single_id": true, |
| "standard_special_tokens": { |
| "bos_token": "<s>", |
| "eos_token": "</s>", |
| "pad_token": "<pad>", |
| "unk_token": "<unk>" |
| } |
| }, |
| "pad_token": "<pad>", |
| "padding_side": "right", |
| "rich_transcription": { |
| "allow_non_speech_events": true, |
| "compact_json": true, |
| "default_format": "json_segments", |
| "enabled": true, |
| "include_content": true, |
| "include_speaker": true, |
| "include_start_end": true, |
| "no_dense_timestamp_tokens": true, |
| "timestamp_precision_digits": 2, |
| "timestamp_unit": "seconds" |
| }, |
| "special_tokens": [ |
| "<|pad|>", |
| "<|bos|>", |
| "<|eos|>", |
| "<|unk|>", |
| "<|system|>", |
| "<|user_channel|>", |
| "<|assistant_channel|>", |
| "<|task:speech_to_text|>", |
| "<|task:text_to_speech|>", |
| "<|input_audio_start|>", |
| "<|input_audio_end|>", |
| "<|audio_ref_start|>", |
| "<|audio_ref_end|>", |
| "<|audio_start|>", |
| "<|audio_end|>", |
| "<|speech_start|>", |
| "<|speech_end|>", |
| "<|transcript_start|>", |
| "<|transcript_end|>", |
| "<|segment_start|>", |
| "<|segment_end|>", |
| "<|speaker|>", |
| "<|start_time|>", |
| "<|end_time|>", |
| "<|duration|>", |
| "<|content|>", |
| "<|non_speech_event|>", |
| "<|retrieval_result_start|>", |
| "<|retrieval_result_end|>", |
| "<|ocr_start|>", |
| "<|ocr_end|>", |
| "<|image_start|>", |
| "<|image_end|>", |
| "<|video_start|>", |
| "<|video_end|>" |
| ], |
| "strict_no_dense_timestamp_tokens": true, |
| "tokenizer_class": "GPT2Tokenizer", |
| "truncation_side": "right", |
| "unk_token": "<unk>", |
| "vocab_size": 114688 |
| } |
|
|