goryden commited on
Commit
a665cb6
·
verified ·
1 Parent(s): ea9726c

Update README.md

Browse files

from transformers import T5Tokenizer
text = "сайн уу"

mn_tokenizer = T5Tokenizer(vocab_file="mn_tokenizer.model")

tokens = mn_tokenizer.tokenize(text)
encoded = mn_tokenizer.encode(text)
decoded = mn_tokenizer.decode(encoded)

print("Original:", text)
print("Mongolian tokenizer tokens:", tokens)
print("Decoded :", decoded)

Files changed (1) hide show
  1. README.md +0 -12
README.md CHANGED
@@ -1,12 +0,0 @@
1
- from transformers import T5Tokenizer
2
- text = "сайн уу"
3
-
4
- mn_tokenizer = T5Tokenizer(vocab_file="mn_tokenizer.model")
5
-
6
- tokens = mn_tokenizer.tokenize(text)
7
- encoded = mn_tokenizer.encode(text)
8
- decoded = mn_tokenizer.decode(encoded)
9
-
10
- print("Original:", text)
11
- print("Mongolian tokenizer tokens:", tokens)
12
- print("Decoded :", decoded)