Spaces:
Paused
Paused
| #!/usr/bin/python3 | |
| # -*- coding: utf-8 -*- | |
| from functools import lru_cache | |
| import os | |
| from typing import List | |
| ltp_data_dir = os.environ.get("LTP_DATA_DIR") | |
| from pyltp import Segmentor | |
| def get_pyltp_tokenizer(): | |
| global ltp_data_dir | |
| cws_model_path = os.path.join(ltp_data_dir, "cws.model") | |
| segmentor = Segmentor(cws_model_path) | |
| return segmentor | |
| def pyltp_tokenize(text: str, language: str) -> List[str]: | |
| segmentor = get_pyltp_tokenizer() | |
| words = segmentor.segment(text) | |
| return words | |
| if __name__ == "__main__": | |
| pass | |