Instructions to use OpenFormosa/PangolinTokenizer with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use OpenFormosa/PangolinTokenizer with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("OpenFormosa/PangolinTokenizer", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| { | |
| "<|pad|>": 114653, | |
| "<|bos|>": 114654, | |
| "<|eos|>": 114655, | |
| "<|unk|>": 114656, | |
| "<|system|>": 114657, | |
| "<|user_channel|>": 114658, | |
| "<|assistant_channel|>": 114659, | |
| "<|task:speech_to_text|>": 114660, | |
| "<|task:text_to_speech|>": 114661, | |
| "<|input_audio_start|>": 114662, | |
| "<|input_audio_end|>": 114663, | |
| "<|audio_ref_start|>": 114664, | |
| "<|audio_ref_end|>": 114665, | |
| "<|audio_start|>": 114666, | |
| "<|audio_end|>": 114667, | |
| "<|speech_start|>": 114668, | |
| "<|speech_end|>": 114669, | |
| "<|transcript_start|>": 114670, | |
| "<|transcript_end|>": 114671, | |
| "<|segment_start|>": 114672, | |
| "<|segment_end|>": 114673, | |
| "<|speaker|>": 114674, | |
| "<|start_time|>": 114675, | |
| "<|end_time|>": 114676, | |
| "<|duration|>": 114677, | |
| "<|content|>": 114678, | |
| "<|non_speech_event|>": 114679, | |
| "<|retrieval_result_start|>": 114680, | |
| "<|retrieval_result_end|>": 114681, | |
| "<|ocr_start|>": 114682, | |
| "<|ocr_end|>": 114683, | |
| "<|image_start|>": 114684, | |
| "<|image_end|>": 114685, | |
| "<|video_start|>": 114686, | |
| "<|video_end|>": 114687, | |
| "<unk>": 114688, | |
| "<s>": 114689, | |
| "</s>": 114690, | |
| "<pad>": 114691, | |
| "<|user|>": 114692, | |
| "<|assistant|>": 114693, | |
| "<|tool_call|>": 114694, | |
| "<|tool_response|>": 114695, | |
| "<|endoftext|>": 114696, | |
| "<think>": 114697, | |
| "</think>": 114698, | |
| "<|no_think|>": 114699, | |
| "<|think|>": 114700, | |
| "<|think_max|>": 114701, | |
| "<|task:text_to_text|>": 114702, | |
| "<|task:speech_to_speech|>": 114703, | |
| "<|task:text_speech_to_text|>": 114704, | |
| "<|task:text_speech_to_speech|>": 114705, | |
| "<|task:full_duplex_speech|>": 114706, | |
| "<|task:agent|>": 114707, | |
| "<|task:tool_use|>": 114708, | |
| "<|task:rag|>": 114709, | |
| "<|task:code_execution|>": 114710, | |
| "<|task:document_qa|>": 114711, | |
| "<|task:data_analysis|>": 114712, | |
| "<|task:workflow|>": 114713, | |
| "<|reasoning_mode:none|>": 114714, | |
| "<|reasoning_mode:short|>": 114715, | |
| "<|reasoning_mode:deep|>": 114716, | |
| "<|reasoning_mode:verify|>": 114717, | |
| "<|private_reasoning_start|>": 114718, | |
| "<|private_reasoning_end|>": 114719, | |
| "<|reasoning_summary_start|>": 114720, | |
| "<|reasoning_summary_end|>": 114721, | |
| "<|plan_start|>": 114722, | |
| "<|plan_end|>": 114723, | |
| "<|step_start|>": 114724, | |
| "<|step_end|>": 114725, | |
| "<|action_start|>": 114726, | |
| "<|action_end|>": 114727, | |
| "<|observation_start|>": 114728, | |
| "<|observation_end|>": 114729, | |
| "<|reflection_start|>": 114730, | |
| "<|reflection_end|>": 114731, | |
| "<|verification_start|>": 114732, | |
| "<|verification_end|>": 114733, | |
| "<|tool_schema_start|>": 114734, | |
| "<|tool_schema_end|>": 114735, | |
| "<|tool_call_start|>": 114736, | |
| "<|tool_call_end|>": 114737, | |
| "<|tool_result_start|>": 114738, | |
| "<|tool_result_end|>": 114739, | |
| "<|tool_error_start|>": 114740, | |
| "<|tool_error_end|>": 114741, | |
| "<|retrieval_query_start|>": 114742, | |
| "<|retrieval_query_end|>": 114743, | |
| "<|citation_start|>": 114744, | |
| "<|citation_end|>": 114745, | |
| "<|memory_read_start|>": 114746, | |
| "<|memory_read_end|>": 114747, | |
| "<|memory_write_start|>": 114748, | |
| "<|memory_write_end|>": 114749, | |
| "<|final_answer_start|>": 114750, | |
| "<|final_answer_end|>": 114751, | |
| "<|json_start|>": 114752, | |
| "<|json_end|>": 114753, | |
| "<|code_start|>": 114754, | |
| "<|code_end|>": 114755, | |
| "<|markdown_start|>": 114756, | |
| "<|markdown_end|>": 114757, | |
| "<|duplex_start|>": 114758, | |
| "<|duplex_end|>": 114759, | |
| "<|system_channel|>": 114760, | |
| "<|listen|>": 114761, | |
| "<|speak|>": 114762, | |
| "<|listen_speak|>": 114763, | |
| "<|output_audio_start|>": 114764, | |
| "<|output_audio_end|>": 114765, | |
| "<|text_start|>": 114766, | |
| "<|text_end|>": 114767, | |
| "<|overlap|>": 114768, | |
| "<|barge_in|>": 114769, | |
| "<|interruption|>": 114770, | |
| "<|interruption_repair|>": 114771, | |
| "<|backchannel|>": 114772, | |
| "<|turn_yield|>": 114773, | |
| "<|hold|>": 114774, | |
| "<|silence|>": 114775, | |
| "<|non_speech|>": 114776, | |
| "<|voice_reference_start|>": 114777, | |
| "<|voice_reference_end|>": 114778, | |
| "<|voice_reference|>": 114779, | |
| "<|voice_switch|>": 114780, | |
| "<|speaker_style|>": 114781, | |
| "<|prosody_control|>": 114782, | |
| "<|zh_tw|>": 114783, | |
| "<|zh_hant|>": 114784, | |
| "<|taigi|>": 114785, | |
| "<|hakka|>": 114786, | |
| "<|bopomofo|>": 114787, | |
| "<|mixed_en|>": 114788, | |
| "<|en|>": 114789, | |
| "<|ja|>": 114790, | |
| "<|ko|>": 114791, | |
| "<|vi|>": 114792, | |
| "<|id|>": 114793, | |
| "<|th|>": 114794, | |
| "<|asr|>": 114795, | |
| "<|tts|>": 114796, | |
| "<|speaker_0|>": 114797, | |
| "<|speaker_1|>": 114798, | |
| "<|speaker_2|>": 114799, | |
| "<|speaker_3|>": 114800, | |
| "<|timestamp|>": 114801, | |
| "<|noise|>": 114802, | |
| "<|laugh|>": 114803, | |
| "<|breath|>": 114804, | |
| "<|pause|>": 114805, | |
| "<|prosody|>": 114806, | |
| "<|pron|>": 114807, | |
| "</|pron|>": 114808, | |
| "<|image|>": 114809, | |
| "<|ocr|>": 114810, | |
| "<|bbox|>": 114811, | |
| "<|line|>": 114812, | |
| "<|table|>": 114813, | |
| "<|row|>": 114814, | |
| "<|col|>": 114815, | |
| "<|cell|>": 114816, | |
| "<|reading_order|>": 114817, | |
| "<|source|>": 114818, | |
| "<|cite|>": 114819, | |
| "<|evidence|>": 114820, | |
| "<|quote|>": 114821 | |
| } | |