HoneyTian's picture
update
5839f86
raw
history blame contribute delete
575 Bytes
#!/usr/bin/python3
# -*- coding: utf-8 -*-
from typing import Callable, Dict, List
from toolbox.tokenization.pyltp_tokenization import pyltp_tokenize
language_to_engines = {
"chinese": ["pyltp"]
}
engine_to_tagger: Dict[str, Callable] = {
"pyltp": pyltp_tokenize
}
def tokenize(text: str, language: str, engine: str) -> List[str]:
tokenizer = engine_to_tagger.get(engine)
if tokenizer is None:
raise AssertionError(f"engine {engine} not supported.")
words = tokenizer(text, language)
return words
if __name__ == "__main__":
pass