goryden commited on
Commit
8d69de2
·
verified ·
1 Parent(s): 21c5219

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +14 -1
README.md CHANGED
@@ -4,4 +4,17 @@ datasets:
4
  language:
5
  - mn
6
  ---
7
- usage
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  language:
5
  - mn
6
  ---
7
+ ## Usage
8
+ ```python
9
+ from transformers import T5Tokenizer
10
+ text = "сайн уу"
11
+
12
+ mn_tokenizer = T5Tokenizer(vocab_file="mn_tokenizer.model")
13
+
14
+ tokens = mn_tokenizer.tokenize(text)
15
+ encoded = mn_tokenizer.encode(text)
16
+ decoded = mn_tokenizer.decode(encoded)
17
+
18
+ print("Original:", text)
19
+ print("Mongolian tokenizer tokens:", tokens)
20
+ print("Decoded :", decoded)