File size: 2,764 Bytes
e778824
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
import os
import time

from project_settings import project_path

os.environ['NLTK_DATA'] = (project_path / "data/nltk_data").as_posix()

from nltk import DependencyGraph
from pyltp import Segmentor
from pyltp import Parser
from pyltp import Postagger
from pyltp import SementicRoleLabeller


def get_args():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--text",
        default="元芳你怎么看?我就趴窗口上看呗!",
        # default="集中竞价的方式回购公司股份",
        # default=",全中国都是我的",
        # default="可以啊可以",
        # default="我们是免费办理的, 不会收取任何手续费和服务费, 随借随还, 可以留个备用,您看可以吗?",
        # default="。啊不用不用挂断 你这个昨天来过电话 你哪有打不通",
        # default="利息怎么算",
        type=str
    )
    parser.add_argument(
        "--ltp_data_dir",
        default=(project_path / "data/pyltp_models/ltp_data_v3.4.0").as_posix(),
        type=str
    )
    args = parser.parse_args()
    return args


def main():
    """
    句法分析, 并画出句法树

    参考链接:
    https://www.freesion.com/article/91401299576/
    """
    args = get_args()

    cws_model_path = os.path.join(args.ltp_data_dir, 'cws.model')
    pos_model_path = os.path.join(args.ltp_data_dir, 'pos.model')
    parser_model_path = os.path.join(args.ltp_data_dir, 'parser.model')
    srl_model_path = os.path.join(args.ltp_data_dir, 'pisrl_win.model')

    segmentor = Segmentor(cws_model_path)
    pos_tagger = Postagger(pos_model_path)
    parser = Parser(parser_model_path)
    srl_labeler = SementicRoleLabeller(srl_model_path)

    time_begin = time.time()

    words = segmentor.segment(args.text)
    words_ = [word for word in words]
    print(words_)

    postags = pos_tagger.postag(words)
    postags_ = [postag for postag in postags]
    print(postags_)

    arcs = parser.parse(words, postags)

    cost = time.time() - time_begin
    print("cost: {}".format(cost))

    tree_str = ""
    for word, postag, arc in zip(words, postags, arcs):
        head = arc[0]
        relation = arc[1]
        if head == 0:
            relation = "ROOT"

        line = """\t{word}({relation}/{postag})\t{postag}\t{head}\t{relation}\n""".format(
            word=word,
            relation=relation,
            postag=postag,
            head=head,
        )
        tree_str += line

    print(tree_str)
    conlltree = DependencyGraph(tree_str=tree_str)
    tree = conlltree.tree()
    tree.draw()

    segmentor.release()
    pos_tagger.release()
    parser.release()
    return


if __name__ == '__main__':
    main()