|
|
from conllu import parse |
|
|
from io import open |
|
|
from conllu import parse_incr |
|
|
|
|
|
|
|
|
import dill |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_fn(test_sent): |
|
|
|
|
|
with open('models/tnt_pos_tagger.dill', 'rb') as f: |
|
|
tagger = dill.load(f) |
|
|
out = tagger.tag(test_sent) |
|
|
tagged_out = [tags for _,tags in out] |
|
|
|
|
|
return tagged_out |
|
|
|
|
|
|
|
|
|
|
|
def main(): |
|
|
data_file = open("UD_Tamil-TTB-master/ta_ttb-ud-test.conllu", "r", encoding="utf-8") |
|
|
test_sentences=[] |
|
|
for tokenlist in parse_incr(data_file): |
|
|
test_sentences.append(tokenlist) |
|
|
|
|
|
|
|
|
test1=[token['form'] for token in test_sentences[10]] |
|
|
print('input:\n',test1) |
|
|
print(test_fn(test1)) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|