Raapliii commited on
Commit
22c81d9
·
verified ·
1 Parent(s): 0615f8d

update special token

Browse files

git add tokenizer_config.json

Files changed (1) hide show
  1. tokenizer_config.json +11 -40
tokenizer_config.json CHANGED
@@ -5,46 +5,17 @@
5
  "clean_up_tokenization_spaces": true,
6
  "eos_token": "<|endoftext|>",
7
  "errors": "replace",
8
- "extra_special_tokens": [
9
- "<|endoftext|>",
10
- "<fim_prefix>",
11
- "<fim_middle>",
12
- "<fim_suffix>",
13
- "<fim_pad>",
14
- "<repo_name>",
15
- "<file_sep>",
16
- "<issue_start>",
17
- "<issue_comment>",
18
- "<issue_closed>",
19
- "<jupyter_start>",
20
- "<jupyter_text>",
21
- "<jupyter_code>",
22
- "<jupyter_output>",
23
- "<jupyter_script>",
24
- "<empty_output>",
25
- "<code_to_intermediate>",
26
- "<intermediate_to_code>",
27
- "<pr>",
28
- "<pr_status>",
29
- "<pr_is_merged>",
30
- "<pr_base>",
31
- "<pr_file>",
32
- "<pr_base_code>",
33
- "<pr_diff>",
34
- "<pr_diff_hunk>",
35
- "<pr_comment>",
36
- "<pr_event_id>",
37
- "<pr_review>",
38
- "<pr_review_state>",
39
- "<pr_review_comment>",
40
- "<pr_in_reply_to_review_id>",
41
- "<pr_in_reply_to_comment_id>",
42
- "<pr_diff_hunk_comment_line>",
43
- "<NAME>",
44
- "<EMAIL>",
45
- "<KEY>",
46
- "<PASSWORD>"
47
- ],
48
  "is_local": false,
49
  "local_files_only": false,
50
  "model_max_length": 1000000000000000019884624838656,
 
5
  "clean_up_tokenization_spaces": true,
6
  "eos_token": "<|endoftext|>",
7
  "errors": "replace",
8
+ "extra_special_tokens": {
9
+ "add_prefix_space": false,
10
+ "bos_token": "<|endoftext|>",
11
+ "eos_token": "<|endoftext|>",
12
+ "unk_token": "<|endoftext|>",
13
+ "pad_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "model_max_length": 1000000000000000019884624838656,
16
+ "tokenizer_class": "GPT2Tokenizer",
17
+ "vocab_size": 49152
18
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  "is_local": false,
20
  "local_files_only": false,
21
  "model_max_length": 1000000000000000019884624838656,