pmranu commited on
Commit
a833302
·
verified ·
1 Parent(s): dced1e4

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -36,5 +36,6 @@
36
  " ": 50260,
37
  " ": 50259,
38
  " ": 50258,
39
- " ": 50257
 
40
  }
 
36
  " ": 50260,
37
  " ": 50259,
38
  " ": 50258,
39
+ " ": 50257,
40
+ "[PAD]": 50295
41
  }
special_tokens_map.json CHANGED
@@ -13,7 +13,13 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<|endoftext|>",
 
 
 
 
 
 
17
  "unk_token": {
18
  "content": "<|endoftext|>",
19
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
  "unk_token": {
24
  "content": "<|endoftext|>",
25
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -314,6 +314,14 @@
314
  "rstrip": false,
315
  "single_word": false,
316
  "special": false
 
 
 
 
 
 
 
 
317
  }
318
  },
319
  "bos_token": "<|endoftext|>",
@@ -322,7 +330,7 @@
322
  "errors": "replace",
323
  "extra_special_tokens": {},
324
  "model_max_length": 2048,
325
- "pad_token": "<|endoftext|>",
326
  "padding_side": "left",
327
  "return_token_type_ids": false,
328
  "tokenizer_class": "CodeGenTokenizer",
 
314
  "rstrip": false,
315
  "single_word": false,
316
  "special": false
317
+ },
318
+ "50295": {
319
+ "content": "[PAD]",
320
+ "lstrip": false,
321
+ "normalized": false,
322
+ "rstrip": false,
323
+ "single_word": false,
324
+ "special": true
325
  }
326
  },
327
  "bos_token": "<|endoftext|>",
 
330
  "errors": "replace",
331
  "extra_special_tokens": {},
332
  "model_max_length": 2048,
333
+ "pad_token": "[PAD]",
334
  "padding_side": "left",
335
  "return_token_type_ids": false,
336
  "tokenizer_class": "CodeGenTokenizer",