MiniCPM-o-4_5-MLX-4bit / special_tokens_map.json
andrevp's picture
Add audio encoder (Whisper 24L) + TTS model (Llama 20L) weights
905bf29 verified
{
"additional_special_tokens": [
"<unk>",
"<image>",
"</image>",
"<ref>",
"</ref>",
"<box>",
"</box>",
"<quad>",
"</quad>",
"<point>",
"</point>",
"<slice>",
"</slice>",
"<image_id>",
"</image_id>",
"<unit>",
"</unit>",
"<answer>",
"</answer>",
"<focus>",
"</focus>",
"<line>",
"</line>",
"<perception>",
"</perception>",
"<source_image>",
"</source_image>",
"<image_save_to>",
"</image_save_to>",
"<|audio_start|>",
"<|audio|>",
"<|audio_end|>",
"<|spk_bos|>",
"<|spk|>",
"<|spk_eos|>",
"<|tts_bos|>",
"<|tts_eos|>",
"<|listen|>",
"<|speak|>",
"<|interrupt|>",
"<|vad_start|>",
"<|vad_end|>",
"<|emotion_start|>",
"<|emotion_end|>",
"<|speed_start|>",
"<|speed_end|>",
"<|pitch_start|>",
"<|pitch_end|>",
"<|turn_bos|>",
"<|turn_eos|>",
"<|chunk_eos|>",
"<|chunk_bos|>",
"<|chunk_tts_bos|>",
"<|chunk_tts_eos|>",
"<|tts_pad|>",
"<|timbre_7|>",
"<|timbre_8|>",
"<|timbre_9|>",
"<|timbre_10|>",
"<|timbre_11|>",
"<|timbre_12|>",
"<|timbre_13|>",
"<|timbre_14|>",
"<|timbre_15|>",
"<|timbre_16|>",
"<|timbre_17|>",
"<|timbre_18|>",
"<|timbre_19|>",
"<|timbre_20|>",
"<|timbre_21|>",
"<|timbre_22|>",
"<|timbre_23|>",
"<|timbre_24|>",
"<|timbre_25|>",
"<|timbre_26|>",
"<|timbre_27|>",
"<|timbre_28|>",
"<|timbre_29|>",
"<|timbre_30|>",
"<|timbre_31|>"
],
"bos_token": "<|im_start|>",
"eos_token": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": "<unk>"
}