doc(readme): add missing dataset
Browse files
README.md
CHANGED
|
@@ -3,6 +3,7 @@ license: isc
|
|
| 3 |
datasets:
|
| 4 |
- HuggingFaceFW/fineweb
|
| 5 |
- HuggingFaceFW/fineweb-2
|
|
|
|
| 6 |
language:
|
| 7 |
- fr
|
| 8 |
- en
|
|
@@ -42,4 +43,4 @@ from multistral.multistraltokenizer import MultistralTokenizer
|
|
| 42 |
tokenizer = MultistralTokenizer.from_pretrained("models/aizia_tokenizer")
|
| 43 |
tokens = tokenizer.encode("Your text here")
|
| 44 |
text = tokenizer.decode(tokens)
|
| 45 |
-
```
|
|
|
|
| 3 |
datasets:
|
| 4 |
- HuggingFaceFW/fineweb
|
| 5 |
- HuggingFaceFW/fineweb-2
|
| 6 |
+
- nick007x/github-code-2025
|
| 7 |
language:
|
| 8 |
- fr
|
| 9 |
- en
|
|
|
|
| 43 |
tokenizer = MultistralTokenizer.from_pretrained("models/aizia_tokenizer")
|
| 44 |
tokens = tokenizer.encode("Your text here")
|
| 45 |
text = tokenizer.decode(tokens)
|
| 46 |
+
```
|