add tokenizer
Browse files- special_tokens_map.json +1 -1
- tokenizer.json +0 -81
special_tokens_map.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>"
|
|
|
|
| 1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>"}
|
tokenizer.json
CHANGED
|
@@ -2063,87 +2063,6 @@
|
|
| 2063 |
"rstrip": false,
|
| 2064 |
"normalized": false,
|
| 2065 |
"special": true
|
| 2066 |
-
},
|
| 2067 |
-
{
|
| 2068 |
-
"id": 30000,
|
| 2069 |
-
"content": "<P01>",
|
| 2070 |
-
"single_word": false,
|
| 2071 |
-
"lstrip": false,
|
| 2072 |
-
"rstrip": false,
|
| 2073 |
-
"normalized": false,
|
| 2074 |
-
"special": true
|
| 2075 |
-
},
|
| 2076 |
-
{
|
| 2077 |
-
"id": 30001,
|
| 2078 |
-
"content": "<P02>",
|
| 2079 |
-
"single_word": false,
|
| 2080 |
-
"lstrip": false,
|
| 2081 |
-
"rstrip": false,
|
| 2082 |
-
"normalized": false,
|
| 2083 |
-
"special": true
|
| 2084 |
-
},
|
| 2085 |
-
{
|
| 2086 |
-
"id": 30002,
|
| 2087 |
-
"content": "<P03>",
|
| 2088 |
-
"single_word": false,
|
| 2089 |
-
"lstrip": false,
|
| 2090 |
-
"rstrip": false,
|
| 2091 |
-
"normalized": false,
|
| 2092 |
-
"special": true
|
| 2093 |
-
},
|
| 2094 |
-
{
|
| 2095 |
-
"id": 30003,
|
| 2096 |
-
"content": "<P04>",
|
| 2097 |
-
"single_word": false,
|
| 2098 |
-
"lstrip": false,
|
| 2099 |
-
"rstrip": false,
|
| 2100 |
-
"normalized": false,
|
| 2101 |
-
"special": true
|
| 2102 |
-
},
|
| 2103 |
-
{
|
| 2104 |
-
"id": 30004,
|
| 2105 |
-
"content": "<P05>",
|
| 2106 |
-
"single_word": false,
|
| 2107 |
-
"lstrip": false,
|
| 2108 |
-
"rstrip": false,
|
| 2109 |
-
"normalized": false,
|
| 2110 |
-
"special": true
|
| 2111 |
-
},
|
| 2112 |
-
{
|
| 2113 |
-
"id": 30005,
|
| 2114 |
-
"content": "<P06>",
|
| 2115 |
-
"single_word": false,
|
| 2116 |
-
"lstrip": false,
|
| 2117 |
-
"rstrip": false,
|
| 2118 |
-
"normalized": false,
|
| 2119 |
-
"special": true
|
| 2120 |
-
},
|
| 2121 |
-
{
|
| 2122 |
-
"id": 30006,
|
| 2123 |
-
"content": "<P07>",
|
| 2124 |
-
"single_word": false,
|
| 2125 |
-
"lstrip": false,
|
| 2126 |
-
"rstrip": false,
|
| 2127 |
-
"normalized": false,
|
| 2128 |
-
"special": true
|
| 2129 |
-
},
|
| 2130 |
-
{
|
| 2131 |
-
"id": 30007,
|
| 2132 |
-
"content": "<P08>",
|
| 2133 |
-
"single_word": false,
|
| 2134 |
-
"lstrip": false,
|
| 2135 |
-
"rstrip": false,
|
| 2136 |
-
"normalized": false,
|
| 2137 |
-
"special": true
|
| 2138 |
-
},
|
| 2139 |
-
{
|
| 2140 |
-
"id": 30008,
|
| 2141 |
-
"content": "<P09>",
|
| 2142 |
-
"single_word": false,
|
| 2143 |
-
"lstrip": false,
|
| 2144 |
-
"rstrip": false,
|
| 2145 |
-
"normalized": false,
|
| 2146 |
-
"special": true
|
| 2147 |
}
|
| 2148 |
],
|
| 2149 |
"normalizer": {
|
|
|
|
| 2063 |
"rstrip": false,
|
| 2064 |
"normalized": false,
|
| 2065 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2066 |
}
|
| 2067 |
],
|
| 2068 |
"normalizer": {
|