Spaces:
Paused
Paused
File size: 2,764 Bytes
e778824 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
import os
import time
from project_settings import project_path
os.environ['NLTK_DATA'] = (project_path / "data/nltk_data").as_posix()
from nltk import DependencyGraph
from pyltp import Segmentor
from pyltp import Parser
from pyltp import Postagger
from pyltp import SementicRoleLabeller
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--text",
default="元芳你怎么看?我就趴窗口上看呗!",
# default="集中竞价的方式回购公司股份",
# default=",全中国都是我的",
# default="可以啊可以",
# default="我们是免费办理的, 不会收取任何手续费和服务费, 随借随还, 可以留个备用,您看可以吗?",
# default="。啊不用不用挂断 你这个昨天来过电话 你哪有打不通",
# default="利息怎么算",
type=str
)
parser.add_argument(
"--ltp_data_dir",
default=(project_path / "data/pyltp_models/ltp_data_v3.4.0").as_posix(),
type=str
)
args = parser.parse_args()
return args
def main():
"""
句法分析, 并画出句法树
参考链接:
https://www.freesion.com/article/91401299576/
"""
args = get_args()
cws_model_path = os.path.join(args.ltp_data_dir, 'cws.model')
pos_model_path = os.path.join(args.ltp_data_dir, 'pos.model')
parser_model_path = os.path.join(args.ltp_data_dir, 'parser.model')
srl_model_path = os.path.join(args.ltp_data_dir, 'pisrl_win.model')
segmentor = Segmentor(cws_model_path)
pos_tagger = Postagger(pos_model_path)
parser = Parser(parser_model_path)
srl_labeler = SementicRoleLabeller(srl_model_path)
time_begin = time.time()
words = segmentor.segment(args.text)
words_ = [word for word in words]
print(words_)
postags = pos_tagger.postag(words)
postags_ = [postag for postag in postags]
print(postags_)
arcs = parser.parse(words, postags)
cost = time.time() - time_begin
print("cost: {}".format(cost))
tree_str = ""
for word, postag, arc in zip(words, postags, arcs):
head = arc[0]
relation = arc[1]
if head == 0:
relation = "ROOT"
line = """\t{word}({relation}/{postag})\t{postag}\t{head}\t{relation}\n""".format(
word=word,
relation=relation,
postag=postag,
head=head,
)
tree_str += line
print(tree_str)
conlltree = DependencyGraph(tree_str=tree_str)
tree = conlltree.tree()
tree.draw()
segmentor.release()
pos_tagger.release()
parser.release()
return
if __name__ == '__main__':
main()
|