Update README.md
Browse files
README.md
CHANGED
|
@@ -5,6 +5,7 @@ datasets:
|
|
| 5 |
language:
|
| 6 |
- my
|
| 7 |
pipeline_tag: feature-extraction
|
|
|
|
| 8 |
---
|
| 9 |
# DatarrX - myX-Tokenizer-Unigram ⚙️
|
| 10 |
|
|
@@ -39,6 +40,7 @@ If you use this tokenizer in your research or project, please cite it as follows
|
|
| 39 |
Khant Sint Heinn. (2026). *myX-Tokenizer-Unigram: Probabilistic Burmese Script Tokenizer (Version 1.0)* [Computer software]. Hugging Face. https://huggingface.co/DatarrX/myX-Tokenizer-Unigram
|
| 40 |
|
| 41 |
### BibTeX
|
|
|
|
| 42 |
@software{khantsintheinn2026unigram,
|
| 43 |
author = {Khant Sint Heinn},
|
| 44 |
title = {myX-Tokenizer-Unigram: Probabilistic Burmese Script Tokenizer},
|
|
@@ -48,6 +50,7 @@ Khant Sint Heinn. (2026). *myX-Tokenizer-Unigram: Probabilistic Burmese Script T
|
|
| 48 |
url = {https://huggingface.co/DatarrX/myX-Tokenizer-Unigram},
|
| 49 |
note = {Burmese-only training corpus}
|
| 50 |
}
|
|
|
|
| 51 |
|
| 52 |
---
|
| 53 |
|
|
@@ -103,6 +106,7 @@ print(sp.encode_as_pieces(text))
|
|
| 103 |
Khant Sint Heinn. (2026). *myX-Tokenizer-Unigram: Probabilistic Burmese Script Tokenizer (Version 1.0)* [Computer software]. Hugging Face. https://huggingface.co/DatarrX/myX-Tokenizer-Unigram
|
| 104 |
|
| 105 |
### BibTeX
|
|
|
|
| 106 |
@software{khantsintheinn2026unigram,
|
| 107 |
author = {Khant Sint Heinn},
|
| 108 |
title = {myX-Tokenizer-Unigram: Probabilistic Burmese Script Tokenizer},
|
|
@@ -111,4 +115,5 @@ Khant Sint Heinn. (2026). *myX-Tokenizer-Unigram: Probabilistic Burmese Script T
|
|
| 111 |
publisher = {Hugging Face},
|
| 112 |
url = {https://huggingface.co/DatarrX/myX-Tokenizer-Unigram},
|
| 113 |
note = {Burmese-only training corpus}
|
| 114 |
-
}
|
|
|
|
|
|
| 5 |
language:
|
| 6 |
- my
|
| 7 |
pipeline_tag: feature-extraction
|
| 8 |
+
new_version: DatarrX/myX-Tokenizer
|
| 9 |
---
|
| 10 |
# DatarrX - myX-Tokenizer-Unigram ⚙️
|
| 11 |
|
|
|
|
| 40 |
Khant Sint Heinn. (2026). *myX-Tokenizer-Unigram: Probabilistic Burmese Script Tokenizer (Version 1.0)* [Computer software]. Hugging Face. https://huggingface.co/DatarrX/myX-Tokenizer-Unigram
|
| 41 |
|
| 42 |
### BibTeX
|
| 43 |
+
```BibTeX
|
| 44 |
@software{khantsintheinn2026unigram,
|
| 45 |
author = {Khant Sint Heinn},
|
| 46 |
title = {myX-Tokenizer-Unigram: Probabilistic Burmese Script Tokenizer},
|
|
|
|
| 50 |
url = {https://huggingface.co/DatarrX/myX-Tokenizer-Unigram},
|
| 51 |
note = {Burmese-only training corpus}
|
| 52 |
}
|
| 53 |
+
```
|
| 54 |
|
| 55 |
---
|
| 56 |
|
|
|
|
| 106 |
Khant Sint Heinn. (2026). *myX-Tokenizer-Unigram: Probabilistic Burmese Script Tokenizer (Version 1.0)* [Computer software]. Hugging Face. https://huggingface.co/DatarrX/myX-Tokenizer-Unigram
|
| 107 |
|
| 108 |
### BibTeX
|
| 109 |
+
```BibTeX
|
| 110 |
@software{khantsintheinn2026unigram,
|
| 111 |
author = {Khant Sint Heinn},
|
| 112 |
title = {myX-Tokenizer-Unigram: Probabilistic Burmese Script Tokenizer},
|
|
|
|
| 115 |
publisher = {Hugging Face},
|
| 116 |
url = {https://huggingface.co/DatarrX/myX-Tokenizer-Unigram},
|
| 117 |
note = {Burmese-only training corpus}
|
| 118 |
+
}
|
| 119 |
+
```
|