gsaltintas commited on
Commit
3f63eed
·
verified ·
1 Parent(s): 0f1c598

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -47,4 +47,4 @@ tokens = tokenizer.encode("Hello, world!")
47
  ## Sample Encoding
48
  | Text | Tokens | Token IDs |
49
  |------|--------|-----------|
50
- | `yirmi iki+dokuz=otuz bir\ntwenty two+nine=thirty one` | `y, i, r, m, i, Ġ, i, k, i, +, d, o, k, u, z, =, o, t, u, z` | `92, 76, 85, 80, 76, 224, 76, 78, 76, 3, 71, 82, 78, 88, 93, 32, 82, 87, 88, 93` |
 
47
  ## Sample Encoding
48
  | Text | Tokens | Token IDs |
49
  |------|--------|-----------|
50
+ | `yirmi iki+dokuz=otuz bir\ntwenty two+nine=thirty one` | `y, i, r, m, i, Ġ, i, k, i, +, d, o, k, u, z, =, o, t, u, z` | `91, 75, 84, 79, 75, 223, 75, 77, 75, 3, 70, 81, 77, 87, 92, 4, 81, 86, 87, 92` |
merges.txt CHANGED
@@ -1739,3 +1739,4 @@
1739
  ['ettusindeĠogĠsekshundredeĠogĠ', 'treds']
1740
  ['ettusindeĠogĠsekshundredeĠogĠ', 'enogtreds']
1741
  ['ettusindeĠogĠsekshundredeĠogĠ', 'toogtreds']
 
 
1739
  ['ettusindeĠogĠsekshundredeĠogĠ', 'treds']
1740
  ['ettusindeĠogĠsekshundredeĠogĠ', 'enogtreds']
1741
  ['ettusindeĠogĠsekshundredeĠogĠ', 'toogtreds']
1742
+ ['ettusindeĠogĠsekshundredeĠogĠ', 'treogtreds']
special_tokens_map.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "additional_special_tokens": [
3
  "+",
4
- "\\="
5
  ],
6
  "bos_token": "<s>",
7
  "eos_token": "</s>",
 
1
  {
2
  "additional_special_tokens": [
3
  "+",
4
+ "="
5
  ],
6
  "bos_token": "<s>",
7
  "eos_token": "</s>",
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -33,7 +33,7 @@
33
  "special": true
34
  },
35
  "4": {
36
- "content": "\\=",
37
  "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
@@ -43,7 +43,7 @@
43
  },
44
  "additional_special_tokens": [
45
  "+",
46
- "\\="
47
  ],
48
  "bos_token": "<s>",
49
  "clean_up_tokenization_spaces": false,
 
33
  "special": true
34
  },
35
  "4": {
36
+ "content": "=",
37
  "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
 
43
  },
44
  "additional_special_tokens": [
45
  "+",
46
+ "="
47
  ],
48
  "bos_token": "<s>",
49
  "clean_up_tokenization_spaces": false,
vocab.json CHANGED
The diff for this file is too large to render. See raw diff