| datasets: | |
| - saillab/alpaca-mongolian-cleaned | |
| language: | |
| - mn | |
| ## Usage | |
| ```python | |
| from transformers import AutoTokenizer | |
| text = "сайн уу" | |
| mn_tokenizer = AutoTokenizer.from_pretrained("goryden/mn_tokenizer") | |
| tokens = mn_tokenizer.tokenize(text) | |
| encoded = mn_tokenizer.encode(text) | |
| decoded = mn_tokenizer.decode(encoded) | |
| print("Original:", text) | |
| print("Mongolian tokenizer tokens:", tokens) | |
| print("Decoded :", decoded) | |