Spaces:
Paused
Paused
| #!/usr/bin/python3 | |
| # -*- coding: utf-8 -*- | |
| import argparse | |
| import os | |
| import time | |
| from project_settings import project_path | |
| os.environ['NLTK_DATA'] = (project_path / "data/nltk_data").as_posix() | |
| from nltk import DependencyGraph | |
| from pyltp import Segmentor | |
| from pyltp import Parser | |
| from pyltp import Postagger | |
| from pyltp import SementicRoleLabeller | |
| def get_args(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument( | |
| "--text", | |
| default="元芳你怎么看?我就趴窗口上看呗!", | |
| # default="集中竞价的方式回购公司股份", | |
| # default=",全中国都是我的", | |
| # default="可以啊可以", | |
| # default="我们是免费办理的, 不会收取任何手续费和服务费, 随借随还, 可以留个备用,您看可以吗?", | |
| # default="。啊不用不用挂断 你这个昨天来过电话 你哪有打不通", | |
| # default="利息怎么算", | |
| type=str | |
| ) | |
| parser.add_argument( | |
| "--ltp_data_dir", | |
| default=(project_path / "data/pyltp_models/ltp_data_v3.4.0").as_posix(), | |
| type=str | |
| ) | |
| args = parser.parse_args() | |
| return args | |
| def main(): | |
| """ | |
| 句法分析, 并画出句法树 | |
| 参考链接: | |
| https://www.freesion.com/article/91401299576/ | |
| """ | |
| args = get_args() | |
| cws_model_path = os.path.join(args.ltp_data_dir, 'cws.model') | |
| pos_model_path = os.path.join(args.ltp_data_dir, 'pos.model') | |
| parser_model_path = os.path.join(args.ltp_data_dir, 'parser.model') | |
| srl_model_path = os.path.join(args.ltp_data_dir, 'pisrl_win.model') | |
| segmentor = Segmentor(cws_model_path) | |
| pos_tagger = Postagger(pos_model_path) | |
| parser = Parser(parser_model_path) | |
| srl_labeler = SementicRoleLabeller(srl_model_path) | |
| time_begin = time.time() | |
| words = segmentor.segment(args.text) | |
| words_ = [word for word in words] | |
| print(words_) | |
| postags = pos_tagger.postag(words) | |
| postags_ = [postag for postag in postags] | |
| print(postags_) | |
| arcs = parser.parse(words, postags) | |
| cost = time.time() - time_begin | |
| print("cost: {}".format(cost)) | |
| tree_str = "" | |
| for word, postag, arc in zip(words, postags, arcs): | |
| head = arc[0] | |
| relation = arc[1] | |
| if head == 0: | |
| relation = "ROOT" | |
| line = """\t{word}({relation}/{postag})\t{postag}\t{head}\t{relation}\n""".format( | |
| word=word, | |
| relation=relation, | |
| postag=postag, | |
| head=head, | |
| ) | |
| tree_str += line | |
| print(tree_str) | |
| conlltree = DependencyGraph(tree_str=tree_str) | |
| tree = conlltree.tree() | |
| tree.draw() | |
| segmentor.release() | |
| pos_tagger.release() | |
| parser.release() | |
| return | |
| if __name__ == '__main__': | |
| main() | |