HoneyTian's picture
update
b328553
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
import os
from pyltp import Parser, Postagger, Segmentor, SementicRoleLabeller
from project_settings import project_path
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--text",
default="元芳你怎么看?我就趴窗口上看呗!",
type=str
)
parser.add_argument(
"--ltp_data_dir",
default=(project_path / "data/pyltp_models/ltp_data_v3.4.0").as_posix(),
type=str
)
args = parser.parse_args()
return args
def main():
args = get_args()
cws_model_path = os.path.join(args.ltp_data_dir, 'cws.model')
pos_model_path = os.path.join(args.ltp_data_dir, 'pos.model')
parser_model_path = os.path.join(args.ltp_data_dir, 'parser.model')
srl_model_path = os.path.join(args.ltp_data_dir, 'pisrl_win.model')
segmentor = Segmentor(cws_model_path)
pos_tagger = Postagger(pos_model_path)
parser = Parser(parser_model_path)
srl_labeler = SementicRoleLabeller(srl_model_path)
words = segmentor.segment(args.text)
postags = pos_tagger.postag(words)
arcs = parser.parse(words, postags)
roles = srl_labeler.label(words, postags, arcs)
for role in roles:
index = role[0]
role_ = [("INDEX", (index, index))] + role[1]
role_ = list(sorted(role_, key=lambda x: x[1][1]))
row = ""
for r in role_:
name = r[0]
start = r[1][0]
end = r[1][1]
arg_text = "".join(words[start:end+1])
row += f"{arg_text}/{name}\t"
print(row)
segmentor.release()
pos_tagger.release()
parser.release()
srl_labeler.release()
return
if __name__ == "__main__":
main()