Instructions to use OpenFormosa/PangolinTokenizer with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use OpenFormosa/PangolinTokenizer with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("OpenFormosa/PangolinTokenizer", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| { | |
| "unk_token": "<unk>", | |
| "bos_token": "<s>", | |
| "eos_token": "</s>", | |
| "pad_token": "<pad>", | |
| "additional_special_tokens": [ | |
| "<|bos|>", | |
| "<|eos|>", | |
| "<|unk|>", | |
| "<|pad|>", | |
| "<|system|>", | |
| "<|user_channel|>", | |
| "<|assistant_channel|>", | |
| "<|task:speech_to_text|>", | |
| "<|task:text_to_speech|>", | |
| "<|input_audio_start|>", | |
| "<|input_audio_end|>", | |
| "<|audio_ref_start|>", | |
| "<|audio_ref_end|>", | |
| "<|audio_start|>", | |
| "<|audio_end|>", | |
| "<|speech_start|>", | |
| "<|speech_end|>", | |
| "<|transcript_start|>", | |
| "<|transcript_end|>", | |
| "<|segment_start|>", | |
| "<|segment_end|>", | |
| "<|speaker|>", | |
| "<|start_time|>", | |
| "<|end_time|>", | |
| "<|duration|>", | |
| "<|content|>", | |
| "<|non_speech_event|>", | |
| "<|retrieval_result_start|>", | |
| "<|retrieval_result_end|>", | |
| "<|ocr_start|>", | |
| "<|ocr_end|>", | |
| "<|image_start|>", | |
| "<|image_end|>", | |
| "<|video_start|>", | |
| "<|video_end|>", | |
| "<|user|>", | |
| "<|assistant|>", | |
| "<|tool_call|>", | |
| "<|tool_response|>", | |
| "<|endoftext|>", | |
| "<think>", | |
| "</think>", | |
| "<|no_think|>", | |
| "<|think|>", | |
| "<|think_max|>", | |
| "<|task:text_to_text|>", | |
| "<|task:speech_to_speech|>", | |
| "<|task:text_speech_to_text|>", | |
| "<|task:text_speech_to_speech|>", | |
| "<|task:full_duplex_speech|>", | |
| "<|task:agent|>", | |
| "<|task:tool_use|>", | |
| "<|task:rag|>", | |
| "<|task:code_execution|>", | |
| "<|task:document_qa|>", | |
| "<|task:data_analysis|>", | |
| "<|task:workflow|>", | |
| "<|reasoning_mode:none|>", | |
| "<|reasoning_mode:short|>", | |
| "<|reasoning_mode:deep|>", | |
| "<|reasoning_mode:verify|>", | |
| "<|private_reasoning_start|>", | |
| "<|private_reasoning_end|>", | |
| "<|reasoning_summary_start|>", | |
| "<|reasoning_summary_end|>", | |
| "<|plan_start|>", | |
| "<|plan_end|>", | |
| "<|step_start|>", | |
| "<|step_end|>", | |
| "<|action_start|>", | |
| "<|action_end|>", | |
| "<|observation_start|>", | |
| "<|observation_end|>", | |
| "<|reflection_start|>", | |
| "<|reflection_end|>", | |
| "<|verification_start|>", | |
| "<|verification_end|>", | |
| "<|tool_schema_start|>", | |
| "<|tool_schema_end|>", | |
| "<|tool_call_start|>", | |
| "<|tool_call_end|>", | |
| "<|tool_result_start|>", | |
| "<|tool_result_end|>", | |
| "<|tool_error_start|>", | |
| "<|tool_error_end|>", | |
| "<|retrieval_query_start|>", | |
| "<|retrieval_query_end|>", | |
| "<|citation_start|>", | |
| "<|citation_end|>", | |
| "<|memory_read_start|>", | |
| "<|memory_read_end|>", | |
| "<|memory_write_start|>", | |
| "<|memory_write_end|>", | |
| "<|final_answer_start|>", | |
| "<|final_answer_end|>", | |
| "<|json_start|>", | |
| "<|json_end|>", | |
| "<|code_start|>", | |
| "<|code_end|>", | |
| "<|markdown_start|>", | |
| "<|markdown_end|>", | |
| "<|duplex_start|>", | |
| "<|duplex_end|>", | |
| "<|system_channel|>", | |
| "<|listen|>", | |
| "<|speak|>", | |
| "<|listen_speak|>", | |
| "<|output_audio_start|>", | |
| "<|output_audio_end|>", | |
| "<|text_start|>", | |
| "<|text_end|>", | |
| "<|overlap|>", | |
| "<|barge_in|>", | |
| "<|interruption|>", | |
| "<|interruption_repair|>", | |
| "<|backchannel|>", | |
| "<|turn_yield|>", | |
| "<|hold|>", | |
| "<|silence|>", | |
| "<|non_speech|>", | |
| "<|voice_reference_start|>", | |
| "<|voice_reference_end|>", | |
| "<|voice_reference|>", | |
| "<|voice_switch|>", | |
| "<|speaker_style|>", | |
| "<|prosody_control|>", | |
| "<|zh_tw|>", | |
| "<|zh_hant|>", | |
| "<|taigi|>", | |
| "<|hakka|>", | |
| "<|bopomofo|>", | |
| "<|mixed_en|>", | |
| "<|en|>", | |
| "<|ja|>", | |
| "<|ko|>", | |
| "<|vi|>", | |
| "<|id|>", | |
| "<|th|>", | |
| "<|asr|>", | |
| "<|tts|>", | |
| "<|speaker_0|>", | |
| "<|speaker_1|>", | |
| "<|speaker_2|>", | |
| "<|speaker_3|>", | |
| "<|timestamp|>", | |
| "<|noise|>", | |
| "<|laugh|>", | |
| "<|breath|>", | |
| "<|pause|>", | |
| "<|prosody|>", | |
| "<|pron|>", | |
| "</|pron|>", | |
| "<|image|>", | |
| "<|ocr|>", | |
| "<|bbox|>", | |
| "<|line|>", | |
| "<|table|>", | |
| "<|row|>", | |
| "<|col|>", | |
| "<|cell|>", | |
| "<|reading_order|>", | |
| "<|source|>", | |
| "<|cite|>", | |
| "<|evidence|>", | |
| "<|quote|>" | |
| ] | |
| } | |