HoneyTian's picture
update
845e414
raw
history blame contribute delete
959 Bytes
#!/usr/bin/python3
# -*- coding: utf-8 -*-
from functools import lru_cache
import os
ltp_data_dir = os.environ.get("LTP_DATA_DIR")
from pyltp import Postagger, Segmentor, NamedEntityRecognizer
@lru_cache(maxsize=5)
def get_pyltp_srl_tagger():
global ltp_data_dir
cws_model_path = os.path.join(ltp_data_dir, "cws.model")
pos_model_path = os.path.join(ltp_data_dir, "pos.model")
ner_model_path = os.path.join(ltp_data_dir, "ner.model")
segmentor = Segmentor(cws_model_path)
pos_tagger = Postagger(pos_model_path)
recognizer = NamedEntityRecognizer(ner_model_path)
return segmentor, pos_tagger, recognizer
def pyltp_ner(text: str, language: str) -> list:
segmentor, pos_tagger, recognizer = get_pyltp_srl_tagger()
words = segmentor.segment(text)
postags = pos_tagger.postag(words)
ner_tags = recognizer.recognize(words, postags)
return words, postags, ner_tags
if __name__ == "__main__":
pass