Spaces:
Paused
Paused
| #!/usr/bin/python3 | |
| # -*- coding: utf-8 -*- | |
| from typing import Callable, Dict, List | |
| from toolbox.tokenization.pyltp_tokenization import pyltp_tokenize | |
| language_to_engines = { | |
| "chinese": ["pyltp"] | |
| } | |
| engine_to_tagger: Dict[str, Callable] = { | |
| "pyltp": pyltp_tokenize | |
| } | |
| def tokenize(text: str, language: str, engine: str) -> List[str]: | |
| tokenizer = engine_to_tagger.get(engine) | |
| if tokenizer is None: | |
| raise AssertionError(f"engine {engine} not supported.") | |
| words = tokenizer(text, language) | |
| return words | |
| if __name__ == "__main__": | |
| pass | |