Upload 4 files
Browse filesSwitch to tiktoken encoding
- README.md +0 -1
- config.json +1 -1
README.md
CHANGED
|
@@ -3,7 +3,6 @@
|
|
| 3 |
ByteLevel BPE tokenizer trained on fhswf/tiny-stack dataset.
|
| 4 |
|
| 5 |
## Usage
|
| 6 |
-
|
| 7 |
```python
|
| 8 |
from tokenizers.implementations import ByteLevelBPETokenizer
|
| 9 |
from tokenizers.processors import BertProcessing
|
|
|
|
| 3 |
ByteLevel BPE tokenizer trained on fhswf/tiny-stack dataset.
|
| 4 |
|
| 5 |
## Usage
|
|
|
|
| 6 |
```python
|
| 7 |
from tokenizers.implementations import ByteLevelBPETokenizer
|
| 8 |
from tokenizers.processors import BertProcessing
|
config.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
-
"vocab_size": 52000,
|
| 3 |
"model_type": "gpt2",
|
|
|
|
| 4 |
"min_frequency": 2,
|
| 5 |
"special_tokens": [
|
| 6 |
"<s>",
|
|
|
|
| 1 |
{
|
|
|
|
| 2 |
"model_type": "gpt2",
|
| 3 |
+
"vocab_size": 52000,
|
| 4 |
"min_frequency": 2,
|
| 5 |
"special_tokens": [
|
| 6 |
"<s>",
|