Update README.md
Browse files
README.md
CHANGED
|
@@ -1,6 +1,12 @@
|
|
| 1 |
---
|
| 2 |
library_name: transformers
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
---
|
| 5 |
|
| 6 |
# AmhT5 Tokenizer
|
|
@@ -54,5 +60,4 @@ tokens = TOKENIZER.tokenize("A Tokenizer trained for Amharic language.")
|
|
| 54 |
|
| 55 |
print(len(tokens)) # 7
|
| 56 |
print(tokens)
|
| 57 |
-
# ['▁A', '▁Token', 'izer', '▁trained', '▁for', '▁Amharic', '▁language.']
|
| 58 |
-
|
|
|
|
| 1 |
---
|
| 2 |
library_name: transformers
|
| 3 |
+
license: cc-by-4.0
|
| 4 |
+
datasets:
|
| 5 |
+
- HuggingFaceFW/fineweb
|
| 6 |
+
- castorini/wura
|
| 7 |
+
language:
|
| 8 |
+
- am
|
| 9 |
+
- en
|
| 10 |
---
|
| 11 |
|
| 12 |
# AmhT5 Tokenizer
|
|
|
|
| 60 |
|
| 61 |
print(len(tokens)) # 7
|
| 62 |
print(tokens)
|
| 63 |
+
# ['▁A', '▁Token', 'izer', '▁trained', '▁for', '▁Amharic', '▁language.']
|
|
|