Spaces:
Paused
Paused
| #!/usr/bin/python3 | |
| # -*- coding: utf-8 -*- | |
| import argparse | |
| import os | |
| from pyltp import Parser, Postagger, Segmentor, SementicRoleLabeller | |
| from project_settings import project_path | |
| def get_args(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument( | |
| "--text", | |
| default="元芳你怎么看?我就趴窗口上看呗!", | |
| type=str | |
| ) | |
| parser.add_argument( | |
| "--ltp_data_dir", | |
| default=(project_path / "data/pyltp_models/ltp_data_v3.4.0").as_posix(), | |
| type=str | |
| ) | |
| args = parser.parse_args() | |
| return args | |
| def main(): | |
| args = get_args() | |
| cws_model_path = os.path.join(args.ltp_data_dir, 'cws.model') | |
| pos_model_path = os.path.join(args.ltp_data_dir, 'pos.model') | |
| parser_model_path = os.path.join(args.ltp_data_dir, 'parser.model') | |
| srl_model_path = os.path.join(args.ltp_data_dir, 'pisrl_win.model') | |
| segmentor = Segmentor(cws_model_path) | |
| pos_tagger = Postagger(pos_model_path) | |
| parser = Parser(parser_model_path) | |
| srl_labeler = SementicRoleLabeller(srl_model_path) | |
| words = segmentor.segment(args.text) | |
| postags = pos_tagger.postag(words) | |
| arcs = parser.parse(words, postags) | |
| roles = srl_labeler.label(words, postags, arcs) | |
| for role in roles: | |
| index = role[0] | |
| role_ = [("INDEX", (index, index))] + role[1] | |
| role_ = list(sorted(role_, key=lambda x: x[1][1])) | |
| row = "" | |
| for r in role_: | |
| name = r[0] | |
| start = r[1][0] | |
| end = r[1][1] | |
| arg_text = "".join(words[start:end+1]) | |
| row += f"{arg_text}/{name}\t" | |
| print(row) | |
| segmentor.release() | |
| pos_tagger.release() | |
| parser.release() | |
| srl_labeler.release() | |
| return | |
| if __name__ == "__main__": | |
| main() | |