Upload tokenizer
Browse files- added_tokens.json +1 -10
- tokenizer.json +0 -0
- tokenizer_config.json +10 -78
added_tokens.json
CHANGED
|
@@ -1,12 +1,3 @@
|
|
| 1 |
{
|
| 2 |
-
"
|
| 3 |
-
"\n": 32103,
|
| 4 |
-
" ": 32106,
|
| 5 |
-
"<": 32101,
|
| 6 |
-
"[PAD]": 32100,
|
| 7 |
-
"\\": 32107,
|
| 8 |
-
"^": 32108,
|
| 9 |
-
"`": 32105,
|
| 10 |
-
"{": 32102,
|
| 11 |
-
"}": 32104
|
| 12 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"[PAD]": 32100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
}
|
tokenizer.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
CHANGED
|
@@ -1,6 +1,13 @@
|
|
| 1 |
{
|
| 2 |
-
"add_prefix_space": true,
|
| 3 |
"added_tokens_decoder": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
"1": {
|
| 5 |
"content": "</s>",
|
| 6 |
"lstrip": false,
|
|
@@ -824,78 +831,6 @@
|
|
| 824 |
"rstrip": false,
|
| 825 |
"single_word": false,
|
| 826 |
"special": true
|
| 827 |
-
},
|
| 828 |
-
"32101": {
|
| 829 |
-
"content": "<",
|
| 830 |
-
"lstrip": false,
|
| 831 |
-
"normalized": true,
|
| 832 |
-
"rstrip": false,
|
| 833 |
-
"single_word": false,
|
| 834 |
-
"special": false
|
| 835 |
-
},
|
| 836 |
-
"32102": {
|
| 837 |
-
"content": "{",
|
| 838 |
-
"lstrip": false,
|
| 839 |
-
"normalized": true,
|
| 840 |
-
"rstrip": false,
|
| 841 |
-
"single_word": false,
|
| 842 |
-
"special": false
|
| 843 |
-
},
|
| 844 |
-
"32103": {
|
| 845 |
-
"content": "\n",
|
| 846 |
-
"lstrip": false,
|
| 847 |
-
"normalized": true,
|
| 848 |
-
"rstrip": false,
|
| 849 |
-
"single_word": false,
|
| 850 |
-
"special": false
|
| 851 |
-
},
|
| 852 |
-
"32104": {
|
| 853 |
-
"content": "}",
|
| 854 |
-
"lstrip": false,
|
| 855 |
-
"normalized": true,
|
| 856 |
-
"rstrip": false,
|
| 857 |
-
"single_word": false,
|
| 858 |
-
"special": false
|
| 859 |
-
},
|
| 860 |
-
"32105": {
|
| 861 |
-
"content": "`",
|
| 862 |
-
"lstrip": false,
|
| 863 |
-
"normalized": true,
|
| 864 |
-
"rstrip": false,
|
| 865 |
-
"single_word": false,
|
| 866 |
-
"special": false
|
| 867 |
-
},
|
| 868 |
-
"32106": {
|
| 869 |
-
"content": " ",
|
| 870 |
-
"lstrip": false,
|
| 871 |
-
"normalized": true,
|
| 872 |
-
"rstrip": false,
|
| 873 |
-
"single_word": false,
|
| 874 |
-
"special": false
|
| 875 |
-
},
|
| 876 |
-
"32107": {
|
| 877 |
-
"content": "\\",
|
| 878 |
-
"lstrip": false,
|
| 879 |
-
"normalized": true,
|
| 880 |
-
"rstrip": false,
|
| 881 |
-
"single_word": false,
|
| 882 |
-
"special": false
|
| 883 |
-
},
|
| 884 |
-
"32108": {
|
| 885 |
-
"content": "^",
|
| 886 |
-
"lstrip": false,
|
| 887 |
-
"normalized": true,
|
| 888 |
-
"rstrip": false,
|
| 889 |
-
"single_word": false,
|
| 890 |
-
"special": false
|
| 891 |
-
},
|
| 892 |
-
"32109": {
|
| 893 |
-
"content": "\t",
|
| 894 |
-
"lstrip": false,
|
| 895 |
-
"normalized": true,
|
| 896 |
-
"rstrip": false,
|
| 897 |
-
"single_word": false,
|
| 898 |
-
"special": false
|
| 899 |
}
|
| 900 |
},
|
| 901 |
"additional_special_tokens": [
|
|
@@ -1003,12 +938,9 @@
|
|
| 1003 |
"clean_up_tokenization_spaces": true,
|
| 1004 |
"eos_token": "</s>",
|
| 1005 |
"extra_ids": 100,
|
| 1006 |
-
"
|
| 1007 |
-
"model_max_length": 2048,
|
| 1008 |
"pad_token": "[PAD]",
|
| 1009 |
-
"padding_side": "right",
|
| 1010 |
"sp_model_kwargs": {},
|
| 1011 |
"tokenizer_class": "T5Tokenizer",
|
| 1012 |
-
"unk_token": "<unk>"
|
| 1013 |
-
"use_fast": false
|
| 1014 |
}
|
|
|
|
| 1 |
{
|
|
|
|
| 2 |
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<pad>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
"1": {
|
| 12 |
"content": "</s>",
|
| 13 |
"lstrip": false,
|
|
|
|
| 831 |
"rstrip": false,
|
| 832 |
"single_word": false,
|
| 833 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 834 |
}
|
| 835 |
},
|
| 836 |
"additional_special_tokens": [
|
|
|
|
| 938 |
"clean_up_tokenization_spaces": true,
|
| 939 |
"eos_token": "</s>",
|
| 940 |
"extra_ids": 100,
|
| 941 |
+
"model_max_length": 512,
|
|
|
|
| 942 |
"pad_token": "[PAD]",
|
|
|
|
| 943 |
"sp_model_kwargs": {},
|
| 944 |
"tokenizer_class": "T5Tokenizer",
|
| 945 |
+
"unk_token": "<unk>"
|
|
|
|
| 946 |
}
|