File size: 965 Bytes
ee0c4e2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | #!/usr/bin/env python3
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
"""Reduce an ngrams file for training nplm to a smaller version of it.
The smaller version will have fewer ngrams.
"""
from sys import argv
if len(argv) != 5:
print("Wrong number of args, got: " + str(len(argv) - 1) + " expected 4.")
print("Usage: reduce_ngrams.py INFILE OUTFILE START_IDX NGRAMS")
exit()
INFILE = open(argv[1], 'r')
OUTFILE = open(argv[2], 'w')
START_IDX = int(argv[3])
NGRAMS = int(argv[4])
for line in INFILE:
line = line.split()
line = line[START_IDX:START_IDX + NGRAMS]
linetowrite = ""
for token in line:
linetowrite = linetowrite + token + " "
# Strip final empty space and add newline.
linetowrite = linetowrite[:-1]
linetowrite = linetowrite + '\n'
OUTFILE.write(linetowrite)
INFILE.close()
OUTFILE.close()
|