ybelkada commited on
Commit
9b14454
·
verified ·
1 Parent(s): 16a3d81

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +30 -30
  2. tokenizer_config.json +24 -24
tokenizer.json CHANGED
@@ -2073,6 +2073,33 @@
2073
  "normalized": false,
2074
  "special": true
2075
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2076
  {
2077
  "id": 233,
2078
  "content": ">>UNUSED_221<<",
@@ -4691,33 +4718,6 @@
4691
  "rstrip": false,
4692
  "normalized": false,
4693
  "special": true
4694
- },
4695
- {
4696
- "id": 32768,
4697
- "content": "<|file_sep|>",
4698
- "single_word": false,
4699
- "lstrip": false,
4700
- "rstrip": false,
4701
- "normalized": false,
4702
- "special": true
4703
- },
4704
- {
4705
- "id": 32769,
4706
- "content": "<|repo_name|>",
4707
- "single_word": false,
4708
- "lstrip": false,
4709
- "rstrip": false,
4710
- "normalized": false,
4711
- "special": true
4712
- },
4713
- {
4714
- "id": 32770,
4715
- "content": "<|repo_tree|>",
4716
- "single_word": false,
4717
- "lstrip": false,
4718
- "rstrip": false,
4719
- "normalized": false,
4720
- "special": true
4721
  }
4722
  ],
4723
  "normalizer": null,
@@ -4999,9 +4999,9 @@
4999
  "<|im_start|>": 227,
5000
  "<|im_end|>": 228,
5001
  "<|system|>": 229,
5002
- ">>UNUSED_218<<": 230,
5003
- ">>UNUSED_219<<": 231,
5004
- ">>UNUSED_220<<": 232,
5005
  ">>UNUSED_221<<": 233,
5006
  ">>UNUSED_222<<": 234,
5007
  ">>UNUSED_223<<": 235,
 
2073
  "normalized": false,
2074
  "special": true
2075
  },
2076
+ {
2077
+ "id": 230,
2078
+ "content": "<|file_sep|>",
2079
+ "single_word": false,
2080
+ "lstrip": false,
2081
+ "rstrip": false,
2082
+ "normalized": false,
2083
+ "special": true
2084
+ },
2085
+ {
2086
+ "id": 231,
2087
+ "content": "<|repo_name|>",
2088
+ "single_word": false,
2089
+ "lstrip": false,
2090
+ "rstrip": false,
2091
+ "normalized": false,
2092
+ "special": true
2093
+ },
2094
+ {
2095
+ "id": 232,
2096
+ "content": "<|repo_tree|>",
2097
+ "single_word": false,
2098
+ "lstrip": false,
2099
+ "rstrip": false,
2100
+ "normalized": false,
2101
+ "special": true
2102
+ },
2103
  {
2104
  "id": 233,
2105
  "content": ">>UNUSED_221<<",
 
4718
  "rstrip": false,
4719
  "normalized": false,
4720
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4721
  }
4722
  ],
4723
  "normalizer": null,
 
4999
  "<|im_start|>": 227,
5000
  "<|im_end|>": 228,
5001
  "<|system|>": 229,
5002
+ "<|file_sep|>": 230,
5003
+ "<|repo_name|>": 231,
5004
+ "<|repo_tree|>": 232,
5005
  ">>UNUSED_221<<": 233,
5006
  ">>UNUSED_222<<": 234,
5007
  ">>UNUSED_223<<": 235,
tokenizer_config.json CHANGED
@@ -1840,6 +1840,30 @@
1840
  "single_word": false,
1841
  "special": true
1842
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1843
  "233": {
1844
  "content": ">>UNUSED_221<<",
1845
  "lstrip": false,
@@ -4167,30 +4191,6 @@
4167
  "rstrip": false,
4168
  "single_word": false,
4169
  "special": true
4170
- },
4171
- "32768": {
4172
- "content": "<|file_sep|>",
4173
- "lstrip": false,
4174
- "normalized": false,
4175
- "rstrip": false,
4176
- "single_word": false,
4177
- "special": true
4178
- },
4179
- "32769": {
4180
- "content": "<|repo_name|>",
4181
- "lstrip": false,
4182
- "normalized": false,
4183
- "rstrip": false,
4184
- "single_word": false,
4185
- "special": true
4186
- },
4187
- "32770": {
4188
- "content": "<|repo_tree|>",
4189
- "lstrip": false,
4190
- "normalized": false,
4191
- "rstrip": false,
4192
- "single_word": false,
4193
- "special": true
4194
  }
4195
  },
4196
  "additional_special_tokens": [
 
1840
  "single_word": false,
1841
  "special": true
1842
  },
1843
+ "230": {
1844
+ "content": "<|file_sep|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "231": {
1852
+ "content": "<|repo_name|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "232": {
1860
+ "content": "<|repo_tree|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
  "233": {
1868
  "content": ">>UNUSED_221<<",
1869
  "lstrip": false,
 
4191
  "rstrip": false,
4192
  "single_word": false,
4193
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4194
  }
4195
  },
4196
  "additional_special_tokens": [