File size: 987 Bytes
366b225 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
from conllu import parse
from io import open
from conllu import parse_incr
# from sklearn.externals import joblib
# import cloudpickle
import dill
# with open('my_tagger.dill', 'rb') as f:
# tagger = dill.load(f)
# print('Model Loaded')
# hmm_tagger.tag(sent)
def test_fn(test_sent):
with open('models/tnt_pos_tagger.dill', 'rb') as f:
tagger = dill.load(f)
out = tagger.tag(test_sent)
tagged_out = [tags for _,tags in out]
# print('Output :\n',tagged_out)
return tagged_out
def main():
data_file = open("UD_Tamil-TTB-master/ta_ttb-ud-test.conllu", "r", encoding="utf-8")
test_sentences=[]
for tokenlist in parse_incr(data_file):
test_sentences.append(tokenlist)
# test_data_hn=[[(token['form'],token['upostag']) for token in sentence ]for sentence in test_sentences ]
test1=[token['form'] for token in test_sentences[10]]
print('input:\n',test1)
print(test_fn(test1))
if __name__ == "__main__":
main()
|