Spaces:
Paused
Paused
| #!/usr/bin/python3 | |
| # -*- coding: utf-8 -*- | |
| from functools import lru_cache | |
| import os | |
| ltp_data_dir = os.environ.get("LTP_DATA_DIR") | |
| from pyltp import Postagger, Segmentor, NamedEntityRecognizer | |
| def get_pyltp_srl_tagger(): | |
| global ltp_data_dir | |
| cws_model_path = os.path.join(ltp_data_dir, "cws.model") | |
| pos_model_path = os.path.join(ltp_data_dir, "pos.model") | |
| ner_model_path = os.path.join(ltp_data_dir, "ner.model") | |
| segmentor = Segmentor(cws_model_path) | |
| pos_tagger = Postagger(pos_model_path) | |
| recognizer = NamedEntityRecognizer(ner_model_path) | |
| return segmentor, pos_tagger, recognizer | |
| def pyltp_ner(text: str, language: str) -> list: | |
| segmentor, pos_tagger, recognizer = get_pyltp_srl_tagger() | |
| words = segmentor.segment(text) | |
| postags = pos_tagger.postag(words) | |
| ner_tags = recognizer.recognize(words, postags) | |
| return words, postags, ner_tags | |
| if __name__ == "__main__": | |
| pass | |