| """Standalone shim: import Cl100kChatTokenizer without needing Megatron. | |
| The tokenizer class only depends on `tiktoken`; the abstract base classes | |
| from Megatron are stripped out at import-time here for portability. | |
| """ | |
| import sys, types | |
| # Stub the Megatron abstract base classes that the file inherits from. | |
| # We don't need their behaviour at runtime — only the class hierarchy. | |
| _abstract = types.ModuleType("megatron.core.tokenizers.text.libraries.abstract_tokenizer") | |
| class MegatronTokenizerTextAbstract: ... | |
| _abstract.MegatronTokenizerTextAbstract = MegatronTokenizerTextAbstract | |
| _chat = types.ModuleType("megatron.core.tokenizers.text.libraries.chat_template") | |
| class MegatronTokenizerChatTemplate: ... | |
| _chat.MegatronTokenizerChatTemplate = MegatronTokenizerChatTemplate | |
| sys.modules.setdefault( | |
| "megatron.core.tokenizers.text.libraries.abstract_tokenizer", _abstract | |
| ) | |
| sys.modules.setdefault( | |
| "megatron.core.tokenizers.text.libraries.chat_template", _chat | |
| ) | |
| from cl100k_chat_tokenizer import Cl100kChatTokenizer, CL100K_CHAT_SPECIAL_TOKENS # noqa | |