| from argparse import ArgumentParser | |
| from diaparser.parsers import Parser | |
| from Tokenizer.src.tokenizer import split_into_sentences | |
| parser = ArgumentParser() | |
| parser.add_argument('--parser') | |
| parser.add_argument('--infile') | |
| args = parser.parse_args() | |
| PARSER = Parser.load(args.parser) | |
| def read_test_file(file): | |
| with open(file, 'r', encoding='utf-8') as infile: | |
| for line in infile: | |
| yield [tok for tok in ' '.join(split_into_sentences(line)).split()] | |
| test_file = list(read_test_file(args.infile)) | |
| dataset = PARSER.predict(test_file, prob=True) | |
| for i in dataset.sentences: | |
| print(i) | |