#!/usr/bin/python3 # -*- coding: utf-8 -*- import argparse import os import time from project_settings import project_path os.environ['NLTK_DATA'] = (project_path / "data/nltk_data").as_posix() from nltk import DependencyGraph from pyltp import Segmentor from pyltp import Parser from pyltp import Postagger from pyltp import SementicRoleLabeller def get_args(): parser = argparse.ArgumentParser() parser.add_argument( "--text", default="元芳你怎么看?我就趴窗口上看呗!", # default="集中竞价的方式回购公司股份", # default=",全中国都是我的", # default="可以啊可以", # default="我们是免费办理的, 不会收取任何手续费和服务费, 随借随还, 可以留个备用,您看可以吗?", # default="。啊不用不用挂断 你这个昨天来过电话 你哪有打不通", # default="利息怎么算", type=str ) parser.add_argument( "--ltp_data_dir", default=(project_path / "data/pyltp_models/ltp_data_v3.4.0").as_posix(), type=str ) args = parser.parse_args() return args def main(): """ 句法分析, 并画出句法树 参考链接: https://www.freesion.com/article/91401299576/ """ args = get_args() cws_model_path = os.path.join(args.ltp_data_dir, 'cws.model') pos_model_path = os.path.join(args.ltp_data_dir, 'pos.model') parser_model_path = os.path.join(args.ltp_data_dir, 'parser.model') srl_model_path = os.path.join(args.ltp_data_dir, 'pisrl_win.model') segmentor = Segmentor(cws_model_path) pos_tagger = Postagger(pos_model_path) parser = Parser(parser_model_path) srl_labeler = SementicRoleLabeller(srl_model_path) time_begin = time.time() words = segmentor.segment(args.text) words_ = [word for word in words] print(words_) postags = pos_tagger.postag(words) postags_ = [postag for postag in postags] print(postags_) arcs = parser.parse(words, postags) cost = time.time() - time_begin print("cost: {}".format(cost)) tree_str = "" for word, postag, arc in zip(words, postags, arcs): head = arc[0] relation = arc[1] if head == 0: relation = "ROOT" line = """\t{word}({relation}/{postag})\t{postag}\t{head}\t{relation}\n""".format( word=word, relation=relation, postag=postag, head=head, ) tree_str += line print(tree_str) conlltree = DependencyGraph(tree_str=tree_str) tree = conlltree.tree() tree.draw() segmentor.release() pos_tagger.release() parser.release() return if __name__ == '__main__': main()