Training done
Browse files- added_tokens.json +0 -1
- special_tokens_map.json +2 -2
- tokenizer.json +0 -9
- tokenizer_config.json +2 -10
added_tokens.json
CHANGED
|
@@ -12,7 +12,6 @@
|
|
| 12 |
"<s_birth_date>": 57531,
|
| 13 |
"<s_cnh>": 57541,
|
| 14 |
"<s_cpf>": 57533,
|
| 15 |
-
"<s_docparse>": 57545,
|
| 16 |
"<s_expedition_date>": 57539,
|
| 17 |
"<s_filiacao>": 57535,
|
| 18 |
"<s_iitcdip>": 57523,
|
|
|
|
| 12 |
"<s_birth_date>": 57531,
|
| 13 |
"<s_cnh>": 57541,
|
| 14 |
"<s_cpf>": 57533,
|
|
|
|
| 15 |
"<s_expedition_date>": 57539,
|
| 16 |
"<s_filiacao>": 57535,
|
| 17 |
"<s_iitcdip>": 57523,
|
special_tokens_map.json
CHANGED
|
@@ -141,14 +141,14 @@
|
|
| 141 |
"single_word": false
|
| 142 |
},
|
| 143 |
{
|
| 144 |
-
"content": "<
|
| 145 |
"lstrip": false,
|
| 146 |
"normalized": false,
|
| 147 |
"rstrip": false,
|
| 148 |
"single_word": false
|
| 149 |
},
|
| 150 |
{
|
| 151 |
-
"content": "<
|
| 152 |
"lstrip": false,
|
| 153 |
"normalized": false,
|
| 154 |
"rstrip": false,
|
|
|
|
| 141 |
"single_word": false
|
| 142 |
},
|
| 143 |
{
|
| 144 |
+
"content": "<s>",
|
| 145 |
"lstrip": false,
|
| 146 |
"normalized": false,
|
| 147 |
"rstrip": false,
|
| 148 |
"single_word": false
|
| 149 |
},
|
| 150 |
{
|
| 151 |
+
"content": "<s>",
|
| 152 |
"lstrip": false,
|
| 153 |
"normalized": false,
|
| 154 |
"rstrip": false,
|
tokenizer.json
CHANGED
|
@@ -254,15 +254,6 @@
|
|
| 254 |
"rstrip": false,
|
| 255 |
"normalized": false,
|
| 256 |
"special": true
|
| 257 |
-
},
|
| 258 |
-
{
|
| 259 |
-
"id": 57545,
|
| 260 |
-
"content": "<s_docparse>",
|
| 261 |
-
"single_word": false,
|
| 262 |
-
"lstrip": false,
|
| 263 |
-
"rstrip": false,
|
| 264 |
-
"normalized": false,
|
| 265 |
-
"special": true
|
| 266 |
}
|
| 267 |
],
|
| 268 |
"normalizer": {
|
|
|
|
| 254 |
"rstrip": false,
|
| 255 |
"normalized": false,
|
| 256 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
}
|
| 258 |
],
|
| 259 |
"normalizer": {
|
tokenizer_config.json
CHANGED
|
@@ -223,14 +223,6 @@
|
|
| 223 |
"rstrip": false,
|
| 224 |
"single_word": false,
|
| 225 |
"special": true
|
| 226 |
-
},
|
| 227 |
-
"57545": {
|
| 228 |
-
"content": "<s_docparse>",
|
| 229 |
-
"lstrip": false,
|
| 230 |
-
"normalized": false,
|
| 231 |
-
"rstrip": false,
|
| 232 |
-
"single_word": false,
|
| 233 |
-
"special": true
|
| 234 |
}
|
| 235 |
},
|
| 236 |
"additional_special_tokens": [
|
|
@@ -254,8 +246,8 @@
|
|
| 254 |
"</s_cnh>",
|
| 255 |
"<s_naturality>",
|
| 256 |
"</s_naturality>",
|
| 257 |
-
"<
|
| 258 |
-
"<
|
| 259 |
],
|
| 260 |
"bos_token": "<s>",
|
| 261 |
"clean_up_tokenization_spaces": true,
|
|
|
|
| 223 |
"rstrip": false,
|
| 224 |
"single_word": false,
|
| 225 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
}
|
| 227 |
},
|
| 228 |
"additional_special_tokens": [
|
|
|
|
| 246 |
"</s_cnh>",
|
| 247 |
"<s_naturality>",
|
| 248 |
"</s_naturality>",
|
| 249 |
+
"<s>",
|
| 250 |
+
"<s>"
|
| 251 |
],
|
| 252 |
"bos_token": "<s>",
|
| 253 |
"clean_up_tokenization_spaces": true,
|