{ "add_prefix_space": false, "backend": "tokenizers", "bos_token": "", "clean_up_tokenization_spaces": false, "effective_vocab_size": 114822, "eos_token": "", "errors": "replace", "extra_special_tokens": [ "<|bos|>", "<|eos|>", "<|unk|>", "<|pad|>", "<|system|>", "<|user_channel|>", "<|assistant_channel|>", "<|task:speech_to_text|>", "<|task:text_to_speech|>", "<|input_audio_start|>", "<|input_audio_end|>", "<|audio_ref_start|>", "<|audio_ref_end|>", "<|audio_start|>", "<|audio_end|>", "<|speech_start|>", "<|speech_end|>", "<|transcript_start|>", "<|transcript_end|>", "<|segment_start|>", "<|segment_end|>", "<|speaker|>", "<|start_time|>", "<|end_time|>", "<|duration|>", "<|content|>", "<|non_speech_event|>", "<|retrieval_result_start|>", "<|retrieval_result_end|>", "<|ocr_start|>", "<|ocr_end|>", "<|image_start|>", "<|image_end|>", "<|video_start|>", "<|video_end|>", "<|user|>", "<|assistant|>", "<|tool_call|>", "<|tool_response|>", "<|endoftext|>", "", "", "<|no_think|>", "<|think|>", "<|think_max|>", "<|task:text_to_text|>", "<|task:speech_to_speech|>", "<|task:text_speech_to_text|>", "<|task:text_speech_to_speech|>", "<|task:full_duplex_speech|>", "<|task:agent|>", "<|task:tool_use|>", "<|task:rag|>", "<|task:code_execution|>", "<|task:document_qa|>", "<|task:data_analysis|>", "<|task:workflow|>", "<|reasoning_mode:none|>", "<|reasoning_mode:short|>", "<|reasoning_mode:deep|>", "<|reasoning_mode:verify|>", "<|private_reasoning_start|>", "<|private_reasoning_end|>", "<|reasoning_summary_start|>", "<|reasoning_summary_end|>", "<|plan_start|>", "<|plan_end|>", "<|step_start|>", "<|step_end|>", "<|action_start|>", "<|action_end|>", "<|observation_start|>", "<|observation_end|>", "<|reflection_start|>", "<|reflection_end|>", "<|verification_start|>", "<|verification_end|>", "<|tool_schema_start|>", "<|tool_schema_end|>", "<|tool_call_start|>", "<|tool_call_end|>", "<|tool_result_start|>", "<|tool_result_end|>", "<|tool_error_start|>", "<|tool_error_end|>", "<|retrieval_query_start|>", "<|retrieval_query_end|>", "<|citation_start|>", "<|citation_end|>", "<|memory_read_start|>", "<|memory_read_end|>", "<|memory_write_start|>", "<|memory_write_end|>", "<|final_answer_start|>", "<|final_answer_end|>", "<|json_start|>", "<|json_end|>", "<|code_start|>", "<|code_end|>", "<|markdown_start|>", "<|markdown_end|>", "<|duplex_start|>", "<|duplex_end|>", "<|system_channel|>", "<|listen|>", "<|speak|>", "<|listen_speak|>", "<|output_audio_start|>", "<|output_audio_end|>", "<|text_start|>", "<|text_end|>", "<|overlap|>", "<|barge_in|>", "<|interruption|>", "<|interruption_repair|>", "<|backchannel|>", "<|turn_yield|>", "<|hold|>", "<|silence|>", "<|non_speech|>", "<|voice_reference_start|>", "<|voice_reference_end|>", "<|voice_reference|>", "<|voice_switch|>", "<|speaker_style|>", "<|prosody_control|>", "<|zh_tw|>", "<|zh_hant|>", "<|taigi|>", "<|hakka|>", "<|bopomofo|>", "<|mixed_en|>", "<|en|>", "<|ja|>", "<|ko|>", "<|vi|>", "<|id|>", "<|th|>", "<|asr|>", "<|tts|>", "<|speaker_0|>", "<|speaker_1|>", "<|speaker_2|>", "<|speaker_3|>", "<|timestamp|>", "<|noise|>", "<|laugh|>", "<|breath|>", "<|pause|>", "<|prosody|>", "<|pron|>", "", "<|image|>", "<|ocr|>", "<|bbox|>", "<|line|>", "<|table|>", "<|row|>", "<|col|>", "<|cell|>", "<|reading_order|>", "<|source|>", "<|cite|>", "<|evidence|>", "<|quote|>" ], "fix_mistral_regex": true, "is_local": true, "local_files_only": false, "model_max_length": 131072, "model_type": "byte_level_bpe", "no_audio_codec_tokens": true, "no_dense_timestamp_tokens": true, "open_formosa": { "required_special_token_count": 157, "required_special_tokens_present": true, "required_special_tokens_single_id": true, "standard_special_tokens": { "bos_token": "", "eos_token": "", "pad_token": "", "unk_token": "" } }, "pad_token": "", "padding_side": "right", "rich_transcription": { "allow_non_speech_events": true, "compact_json": true, "default_format": "json_segments", "enabled": true, "include_content": true, "include_speaker": true, "include_start_end": true, "no_dense_timestamp_tokens": true, "timestamp_precision_digits": 2, "timestamp_unit": "seconds" }, "special_tokens": [ "<|pad|>", "<|bos|>", "<|eos|>", "<|unk|>", "<|system|>", "<|user_channel|>", "<|assistant_channel|>", "<|task:speech_to_text|>", "<|task:text_to_speech|>", "<|input_audio_start|>", "<|input_audio_end|>", "<|audio_ref_start|>", "<|audio_ref_end|>", "<|audio_start|>", "<|audio_end|>", "<|speech_start|>", "<|speech_end|>", "<|transcript_start|>", "<|transcript_end|>", "<|segment_start|>", "<|segment_end|>", "<|speaker|>", "<|start_time|>", "<|end_time|>", "<|duration|>", "<|content|>", "<|non_speech_event|>", "<|retrieval_result_start|>", "<|retrieval_result_end|>", "<|ocr_start|>", "<|ocr_end|>", "<|image_start|>", "<|image_end|>", "<|video_start|>", "<|video_end|>" ], "strict_no_dense_timestamp_tokens": true, "tokenizer_class": "GPT2Tokenizer", "truncation_side": "right", "unk_token": "", "vocab_size": 114688 }