Update README.md
#1
by
goryden
- opened
from transformers import T5Tokenizer
text = "сайн уу"
mn_tokenizer = T5Tokenizer(vocab_file="mn_tokenizer.model")
tokens = mn_tokenizer.tokenize(text)
encoded = mn_tokenizer.encode(text)
decoded = mn_tokenizer.decode(encoded)
print("Original:", text)
print("Mongolian tokenizer tokens:", tokens)
print("Decoded :", decoded)