#!/usr/bin/python3 # -*- coding: utf-8 -*- import argparse import os from pyltp import Parser, Postagger, Segmentor, SementicRoleLabeller from project_settings import project_path def get_args(): parser = argparse.ArgumentParser() parser.add_argument( "--text", default="元芳你怎么看?我就趴窗口上看呗!", type=str ) parser.add_argument( "--ltp_data_dir", default=(project_path / "data/pyltp_models/ltp_data_v3.4.0").as_posix(), type=str ) args = parser.parse_args() return args def main(): args = get_args() cws_model_path = os.path.join(args.ltp_data_dir, 'cws.model') pos_model_path = os.path.join(args.ltp_data_dir, 'pos.model') parser_model_path = os.path.join(args.ltp_data_dir, 'parser.model') srl_model_path = os.path.join(args.ltp_data_dir, 'pisrl_win.model') segmentor = Segmentor(cws_model_path) pos_tagger = Postagger(pos_model_path) parser = Parser(parser_model_path) srl_labeler = SementicRoleLabeller(srl_model_path) words = segmentor.segment(args.text) postags = pos_tagger.postag(words) arcs = parser.parse(words, postags) roles = srl_labeler.label(words, postags, arcs) for role in roles: index = role[0] role_ = [("INDEX", (index, index))] + role[1] role_ = list(sorted(role_, key=lambda x: x[1][1])) row = "" for r in role_: name = r[0] start = r[1][0] end = r[1][1] arg_text = "".join(words[start:end+1]) row += f"{arg_text}/{name}\t" print(row) segmentor.release() pos_tagger.release() parser.release() srl_labeler.release() return if __name__ == "__main__": main()