GitInvoiceCzechTrainer / tokenizer_config.json
TomasFAV's picture
Upload processor
1ff2033 verified
{
"added_tokens_decoder": {
"0": {
"content": "[PAD]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"100": {
"content": "[UNK]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"101": {
"content": "[CLS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"102": {
"content": "[SEP]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"103": {
"content": "[MASK]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30522": {
"content": "</s_due_date>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30523": {
"content": "<s_customer_tax_id>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30524": {
"content": "</s_customer_tax_id>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30525": {
"content": "</s_issue_date>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30526": {
"content": "</s_taxable_supply_date>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30527": {
"content": "<s_variable_symbol>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30528": {
"content": "</s_total_vat>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30529": {
"content": "</s_supplier_tax_id>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30530": {
"content": "</s_payment>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30531": {
"content": "<s_total_price>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30532": {
"content": "<s_taxable_supply_date>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30533": {
"content": "</s_variable_symbol>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30534": {
"content": "<s_supplier_register_id>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30535": {
"content": "<s_total_vat>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30536": {
"content": "<s_BIC>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30537": {
"content": "</s_supplier_name>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30538": {
"content": "</s_supplier_register_id>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30539": {
"content": "<s_supplier_tax_id>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30540": {
"content": "<s_IBAN>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30541": {
"content": "</s_currency>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30542": {
"content": "<s_due_date>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30543": {
"content": "</s_invoice_number>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30544": {
"content": "<s_customer_name>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30545": {
"content": "</s_IBAN>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30546": {
"content": "<s_payment>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30547": {
"content": "<s_supplier_name>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30548": {
"content": "<s_issue_date>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30549": {
"content": "<s_const_symbol>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30550": {
"content": "</s_const_symbol>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30551": {
"content": "<s_bank_account_number>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30552": {
"content": "<s_customer_register_id>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30553": {
"content": "</s_customer_name>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30554": {
"content": "</s_BIC>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30555": {
"content": "</s_total_price>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30556": {
"content": "</s_customer_register_id>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30557": {
"content": "</s_bank_account_number>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30558": {
"content": "<s_invoice_number>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30559": {
"content": "<s_currency>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30560": {
"content": "<reserved_1>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30561": {
"content": "<reserved_2>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30562": {
"content": "<reserved_3>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30563": {
"content": "<reserved_4>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30564": {
"content": "<reserved_5>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30565": {
"content": "<reserved_6>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30566": {
"content": "<reserved_7>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30567": {
"content": "<reserved_8>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30568": {
"content": "<reserved_9>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30569": {
"content": "<reserved_10>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30570": {
"content": "<reserved_11>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30571": {
"content": "<reserved_12>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30572": {
"content": "<reserved_13>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30573": {
"content": "<reserved_14>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30574": {
"content": "<reserved_15>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30575": {
"content": "<reserved_16>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30576": {
"content": "<reserved_17>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30577": {
"content": "<reserved_18>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30578": {
"content": "<reserved_19>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30579": {
"content": "<reserved_20>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30580": {
"content": "<reserved_21>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30581": {
"content": "<reserved_22>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30582": {
"content": "<reserved_23>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30583": {
"content": "<reserved_24>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30584": {
"content": "<reserved_25>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30585": {
"content": "<reserved_26>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30586": {
"content": "<reserved_27>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30587": {
"content": "<reserved_28>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30588": {
"content": "<reserved_29>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30589": {
"content": "<reserved_30>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30590": {
"content": "<reserved_31>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30591": {
"content": "<reserved_32>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
}
},
"additional_special_tokens": [
"</s_due_date>",
"<s_customer_tax_id>",
"</s_customer_tax_id>",
"</s_issue_date>",
"</s_taxable_supply_date>",
"<s_variable_symbol>",
"</s_total_vat>",
"</s_supplier_tax_id>",
"</s_payment>",
"<s_total_price>",
"<s_taxable_supply_date>",
"</s_variable_symbol>",
"<s_supplier_register_id>",
"<s_total_vat>",
"<s_BIC>",
"</s_supplier_name>",
"</s_supplier_register_id>",
"<s_supplier_tax_id>",
"<s_IBAN>",
"</s_currency>",
"<s_due_date>",
"</s_invoice_number>",
"<s_customer_name>",
"</s_IBAN>",
"<s_payment>",
"<s_supplier_name>",
"<s_issue_date>",
"<s_const_symbol>",
"</s_const_symbol>",
"<s_bank_account_number>",
"<s_customer_register_id>",
"</s_customer_name>",
"</s_BIC>",
"</s_total_price>",
"</s_customer_register_id>",
"</s_bank_account_number>",
"<s_invoice_number>",
"<s_currency>"
],
"clean_up_tokenization_spaces": false,
"cls_token": "[CLS]",
"do_lower_case": true,
"extra_special_tokens": {},
"mask_token": "[MASK]",
"model_input_names": [
"input_ids",
"attention_mask"
],
"model_max_length": 512,
"pad_token": "[PAD]",
"processor_class": "GitProcessor",
"sep_token": "[SEP]",
"strip_accents": null,
"tokenize_chinese_chars": true,
"tokenizer_class": "BertTokenizer",
"unk_token": "[UNK]"
}