#!/usr/bin/python3 # -*- coding: utf-8 -*- from typing import Callable, Dict, List from toolbox.tokenization.pyltp_tokenization import pyltp_tokenize language_to_engines = { "chinese": ["pyltp"] } engine_to_tagger: Dict[str, Callable] = { "pyltp": pyltp_tokenize } def tokenize(text: str, language: str, engine: str) -> List[str]: tokenizer = engine_to_tagger.get(engine) if tokenizer is None: raise AssertionError(f"engine {engine} not supported.") words = tokenizer(text, language) return words if __name__ == "__main__": pass