goryden commited on
Commit
ea9726c
·
verified ·
1 Parent(s): 6b244ee

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +12 -0
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import T5Tokenizer
2
+ text = "сайн уу"
3
+
4
+ mn_tokenizer = T5Tokenizer(vocab_file="mn_tokenizer.model")
5
+
6
+ tokens = mn_tokenizer.tokenize(text)
7
+ encoded = mn_tokenizer.encode(text)
8
+ decoded = mn_tokenizer.decode(encoded)
9
+
10
+ print("Original:", text)
11
+ print("Mongolian tokenizer tokens:", tokens)
12
+ print("Decoded :", decoded)