YongganFu commited on
Commit
f203027
·
verified ·
1 Parent(s): 3357a8e

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +2 -2
  2. tokenizer_config.json +4 -9
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3277c00fe5fb3963b3cb7c07b7f183722d2af4d775a4aea7cfb3684d7cccbc2f
3
- size 17078330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0240ce510f08e6c2041724e9043e33be9d251d1e4a4d94eb68cd47b954b61d2
3
+ size 17078292
tokenizer_config.json CHANGED
@@ -84,7 +84,7 @@
84
  "special": true
85
  },
86
  "10": {
87
- "content": "<SPECIAL_10>",
88
  "lstrip": false,
89
  "normalized": false,
90
  "rstrip": false,
@@ -92,7 +92,7 @@
92
  "special": true
93
  },
94
  "11": {
95
- "content": "<SPECIAL_11>",
96
  "lstrip": false,
97
  "normalized": false,
98
  "rstrip": false,
@@ -100,7 +100,7 @@
100
  "special": true
101
  },
102
  "12": {
103
- "content": "<SPECIAL_12>",
104
  "lstrip": false,
105
  "normalized": false,
106
  "rstrip": false,
@@ -108,7 +108,7 @@
108
  "special": true
109
  },
110
  "13": {
111
- "content": "<SPECIAL_13>",
112
  "lstrip": false,
113
  "normalized": false,
114
  "rstrip": false,
@@ -8005,14 +8005,9 @@
8005
  }
8006
  },
8007
  "bos_token": "<s>",
8008
- "chat_template": "{{'<SPECIAL_10>System'}}{% for message in messages %}{% if message['role'] == 'system' %}{{'\n' + message['content'].strip()}}{% endif %}{% endfor %}{{'\n'}}{% for message in messages %}{% if message['role'] == 'user' %}{{ '\n<SPECIAL_11>User\n' + message['content'].strip() + '\n<SPECIAL_11>Assistant\n' }}{% elif message['role'] == 'assistant' %}{{ message['content'].strip() }}{% endif %}{% endfor %}",
8009
  "clean_up_tokenization_spaces": false,
8010
  "eos_token": "</s>",
8011
  "extra_special_tokens": {},
8012
- "model_input_names": [
8013
- "input_ids",
8014
- "attention_mask"
8015
- ],
8016
  "model_max_length": 1000000000000000019884624838656,
8017
  "tokenizer_class": "PreTrainedTokenizerFast",
8018
  "unk_token": "<unk>"
 
84
  "special": true
85
  },
86
  "10": {
87
+ "content": "<pad>",
88
  "lstrip": false,
89
  "normalized": false,
90
  "rstrip": false,
 
92
  "special": true
93
  },
94
  "11": {
95
+ "content": "[PREFIX]",
96
  "lstrip": false,
97
  "normalized": false,
98
  "rstrip": false,
 
100
  "special": true
101
  },
102
  "12": {
103
+ "content": "[MIDDLE]",
104
  "lstrip": false,
105
  "normalized": false,
106
  "rstrip": false,
 
108
  "special": true
109
  },
110
  "13": {
111
+ "content": "[SUFFIX]",
112
  "lstrip": false,
113
  "normalized": false,
114
  "rstrip": false,
 
8005
  }
8006
  },
8007
  "bos_token": "<s>",
 
8008
  "clean_up_tokenization_spaces": false,
8009
  "eos_token": "</s>",
8010
  "extra_special_tokens": {},
 
 
 
 
8011
  "model_max_length": 1000000000000000019884624838656,
8012
  "tokenizer_class": "PreTrainedTokenizerFast",
8013
  "unk_token": "<unk>"