Upload folder using huggingface_hub
Browse files- README.md +1 -1
- merges.txt +1 -0
- special_tokens_map.json +1 -1
- tokenizer.json +0 -0
- tokenizer_config.json +2 -2
- vocab.json +0 -0
README.md
CHANGED
|
@@ -47,4 +47,4 @@ tokens = tokenizer.encode("Hello, world!")
|
|
| 47 |
## Sample Encoding
|
| 48 |
| Text | Tokens | Token IDs |
|
| 49 |
|------|--------|-----------|
|
| 50 |
-
| `yirmi iki+dokuz=otuz bir\ntwenty two+nine=thirty one` | `y, i, r, m, i, Ġ, i, k, i, +, d, o, k, u, z, =, o, t, u, z` | `
|
|
|
|
| 47 |
## Sample Encoding
|
| 48 |
| Text | Tokens | Token IDs |
|
| 49 |
|------|--------|-----------|
|
| 50 |
+
| `yirmi iki+dokuz=otuz bir\ntwenty two+nine=thirty one` | `y, i, r, m, i, Ġ, i, k, i, +, d, o, k, u, z, =, o, t, u, z` | `91, 75, 84, 79, 75, 223, 75, 77, 75, 3, 70, 81, 77, 87, 92, 4, 81, 86, 87, 92` |
|
merges.txt
CHANGED
|
@@ -1739,3 +1739,4 @@
|
|
| 1739 |
['ettusindeĠogĠsekshundredeĠogĠ', 'treds']
|
| 1740 |
['ettusindeĠogĠsekshundredeĠogĠ', 'enogtreds']
|
| 1741 |
['ettusindeĠogĠsekshundredeĠogĠ', 'toogtreds']
|
|
|
|
|
|
| 1739 |
['ettusindeĠogĠsekshundredeĠogĠ', 'treds']
|
| 1740 |
['ettusindeĠogĠsekshundredeĠogĠ', 'enogtreds']
|
| 1741 |
['ettusindeĠogĠsekshundredeĠogĠ', 'toogtreds']
|
| 1742 |
+
['ettusindeĠogĠsekshundredeĠogĠ', 'treogtreds']
|
special_tokens_map.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"additional_special_tokens": [
|
| 3 |
"+",
|
| 4 |
-
"
|
| 5 |
],
|
| 6 |
"bos_token": "<s>",
|
| 7 |
"eos_token": "</s>",
|
|
|
|
| 1 |
{
|
| 2 |
"additional_special_tokens": [
|
| 3 |
"+",
|
| 4 |
+
"="
|
| 5 |
],
|
| 6 |
"bos_token": "<s>",
|
| 7 |
"eos_token": "</s>",
|
tokenizer.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
CHANGED
|
@@ -33,7 +33,7 @@
|
|
| 33 |
"special": true
|
| 34 |
},
|
| 35 |
"4": {
|
| 36 |
-
"content": "
|
| 37 |
"lstrip": false,
|
| 38 |
"normalized": false,
|
| 39 |
"rstrip": false,
|
|
@@ -43,7 +43,7 @@
|
|
| 43 |
},
|
| 44 |
"additional_special_tokens": [
|
| 45 |
"+",
|
| 46 |
-
"
|
| 47 |
],
|
| 48 |
"bos_token": "<s>",
|
| 49 |
"clean_up_tokenization_spaces": false,
|
|
|
|
| 33 |
"special": true
|
| 34 |
},
|
| 35 |
"4": {
|
| 36 |
+
"content": "=",
|
| 37 |
"lstrip": false,
|
| 38 |
"normalized": false,
|
| 39 |
"rstrip": false,
|
|
|
|
| 43 |
},
|
| 44 |
"additional_special_tokens": [
|
| 45 |
"+",
|
| 46 |
+
"="
|
| 47 |
],
|
| 48 |
"bos_token": "<s>",
|
| 49 |
"clean_up_tokenization_spaces": false,
|
vocab.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|