erickcrus commited on
Commit
5772da9
·
verified ·
1 Parent(s): 1821c53

Training done

Browse files
added_tokens.json CHANGED
@@ -12,7 +12,6 @@
12
  "<s_birth_date>": 57531,
13
  "<s_cnh>": 57541,
14
  "<s_cpf>": 57533,
15
- "<s_docparse>": 57545,
16
  "<s_expedition_date>": 57539,
17
  "<s_filiacao>": 57535,
18
  "<s_iitcdip>": 57523,
 
12
  "<s_birth_date>": 57531,
13
  "<s_cnh>": 57541,
14
  "<s_cpf>": 57533,
 
15
  "<s_expedition_date>": 57539,
16
  "<s_filiacao>": 57535,
17
  "<s_iitcdip>": 57523,
special_tokens_map.json CHANGED
@@ -141,14 +141,14 @@
141
  "single_word": false
142
  },
143
  {
144
- "content": "<s_docparse>",
145
  "lstrip": false,
146
  "normalized": false,
147
  "rstrip": false,
148
  "single_word": false
149
  },
150
  {
151
- "content": "<s_docparse>",
152
  "lstrip": false,
153
  "normalized": false,
154
  "rstrip": false,
 
141
  "single_word": false
142
  },
143
  {
144
+ "content": "<s>",
145
  "lstrip": false,
146
  "normalized": false,
147
  "rstrip": false,
148
  "single_word": false
149
  },
150
  {
151
+ "content": "<s>",
152
  "lstrip": false,
153
  "normalized": false,
154
  "rstrip": false,
tokenizer.json CHANGED
@@ -254,15 +254,6 @@
254
  "rstrip": false,
255
  "normalized": false,
256
  "special": true
257
- },
258
- {
259
- "id": 57545,
260
- "content": "<s_docparse>",
261
- "single_word": false,
262
- "lstrip": false,
263
- "rstrip": false,
264
- "normalized": false,
265
- "special": true
266
  }
267
  ],
268
  "normalizer": {
 
254
  "rstrip": false,
255
  "normalized": false,
256
  "special": true
 
 
 
 
 
 
 
 
 
257
  }
258
  ],
259
  "normalizer": {
tokenizer_config.json CHANGED
@@ -223,14 +223,6 @@
223
  "rstrip": false,
224
  "single_word": false,
225
  "special": true
226
- },
227
- "57545": {
228
- "content": "<s_docparse>",
229
- "lstrip": false,
230
- "normalized": false,
231
- "rstrip": false,
232
- "single_word": false,
233
- "special": true
234
  }
235
  },
236
  "additional_special_tokens": [
@@ -254,8 +246,8 @@
254
  "</s_cnh>",
255
  "<s_naturality>",
256
  "</s_naturality>",
257
- "<s_docparse>",
258
- "<s_docparse>"
259
  ],
260
  "bos_token": "<s>",
261
  "clean_up_tokenization_spaces": true,
 
223
  "rstrip": false,
224
  "single_word": false,
225
  "special": true
 
 
 
 
 
 
 
 
226
  }
227
  },
228
  "additional_special_tokens": [
 
246
  "</s_cnh>",
247
  "<s_naturality>",
248
  "</s_naturality>",
249
+ "<s>",
250
+ "<s>"
251
  ],
252
  "bos_token": "<s>",
253
  "clean_up_tokenization_spaces": true,