Upload tokenizer
Browse files- added_tokens.json +210 -0
- special_tokens_map.json +221 -0
- spiece.model +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +23 -0
added_tokens.json
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"<pad>": 30002,
|
| 3 |
+
"[CLS]": 30000,
|
| 4 |
+
"[MASK]": 30003,
|
| 5 |
+
"[SEP]": 30001,
|
| 6 |
+
"[XXXXX0]": 30004,
|
| 7 |
+
"[XXXXX100]": 30104,
|
| 8 |
+
"[XXXXX101]": 30105,
|
| 9 |
+
"[XXXXX102]": 30106,
|
| 10 |
+
"[XXXXX103]": 30107,
|
| 11 |
+
"[XXXXX104]": 30108,
|
| 12 |
+
"[XXXXX105]": 30109,
|
| 13 |
+
"[XXXXX106]": 30110,
|
| 14 |
+
"[XXXXX107]": 30111,
|
| 15 |
+
"[XXXXX108]": 30112,
|
| 16 |
+
"[XXXXX109]": 30113,
|
| 17 |
+
"[XXXXX10]": 30014,
|
| 18 |
+
"[XXXXX110]": 30114,
|
| 19 |
+
"[XXXXX111]": 30115,
|
| 20 |
+
"[XXXXX112]": 30116,
|
| 21 |
+
"[XXXXX113]": 30117,
|
| 22 |
+
"[XXXXX114]": 30118,
|
| 23 |
+
"[XXXXX115]": 30119,
|
| 24 |
+
"[XXXXX116]": 30120,
|
| 25 |
+
"[XXXXX117]": 30121,
|
| 26 |
+
"[XXXXX118]": 30122,
|
| 27 |
+
"[XXXXX119]": 30123,
|
| 28 |
+
"[XXXXX11]": 30015,
|
| 29 |
+
"[XXXXX120]": 30124,
|
| 30 |
+
"[XXXXX121]": 30125,
|
| 31 |
+
"[XXXXX122]": 30126,
|
| 32 |
+
"[XXXXX123]": 30127,
|
| 33 |
+
"[XXXXX124]": 30128,
|
| 34 |
+
"[XXXXX125]": 30129,
|
| 35 |
+
"[XXXXX126]": 30130,
|
| 36 |
+
"[XXXXX127]": 30131,
|
| 37 |
+
"[XXXXX128]": 30132,
|
| 38 |
+
"[XXXXX129]": 30133,
|
| 39 |
+
"[XXXXX12]": 30016,
|
| 40 |
+
"[XXXXX130]": 30134,
|
| 41 |
+
"[XXXXX131]": 30135,
|
| 42 |
+
"[XXXXX132]": 30136,
|
| 43 |
+
"[XXXXX133]": 30137,
|
| 44 |
+
"[XXXXX134]": 30138,
|
| 45 |
+
"[XXXXX135]": 30139,
|
| 46 |
+
"[XXXXX136]": 30140,
|
| 47 |
+
"[XXXXX137]": 30141,
|
| 48 |
+
"[XXXXX138]": 30142,
|
| 49 |
+
"[XXXXX139]": 30143,
|
| 50 |
+
"[XXXXX13]": 30017,
|
| 51 |
+
"[XXXXX140]": 30144,
|
| 52 |
+
"[XXXXX141]": 30145,
|
| 53 |
+
"[XXXXX142]": 30146,
|
| 54 |
+
"[XXXXX143]": 30147,
|
| 55 |
+
"[XXXXX144]": 30148,
|
| 56 |
+
"[XXXXX145]": 30149,
|
| 57 |
+
"[XXXXX146]": 30150,
|
| 58 |
+
"[XXXXX147]": 30151,
|
| 59 |
+
"[XXXXX148]": 30152,
|
| 60 |
+
"[XXXXX149]": 30153,
|
| 61 |
+
"[XXXXX14]": 30018,
|
| 62 |
+
"[XXXXX150]": 30154,
|
| 63 |
+
"[XXXXX151]": 30155,
|
| 64 |
+
"[XXXXX152]": 30156,
|
| 65 |
+
"[XXXXX153]": 30157,
|
| 66 |
+
"[XXXXX154]": 30158,
|
| 67 |
+
"[XXXXX155]": 30159,
|
| 68 |
+
"[XXXXX156]": 30160,
|
| 69 |
+
"[XXXXX157]": 30161,
|
| 70 |
+
"[XXXXX158]": 30162,
|
| 71 |
+
"[XXXXX159]": 30163,
|
| 72 |
+
"[XXXXX15]": 30019,
|
| 73 |
+
"[XXXXX160]": 30164,
|
| 74 |
+
"[XXXXX161]": 30165,
|
| 75 |
+
"[XXXXX162]": 30166,
|
| 76 |
+
"[XXXXX163]": 30167,
|
| 77 |
+
"[XXXXX164]": 30168,
|
| 78 |
+
"[XXXXX165]": 30169,
|
| 79 |
+
"[XXXXX166]": 30170,
|
| 80 |
+
"[XXXXX167]": 30171,
|
| 81 |
+
"[XXXXX168]": 30172,
|
| 82 |
+
"[XXXXX169]": 30173,
|
| 83 |
+
"[XXXXX16]": 30020,
|
| 84 |
+
"[XXXXX170]": 30174,
|
| 85 |
+
"[XXXXX171]": 30175,
|
| 86 |
+
"[XXXXX172]": 30176,
|
| 87 |
+
"[XXXXX173]": 30177,
|
| 88 |
+
"[XXXXX174]": 30178,
|
| 89 |
+
"[XXXXX175]": 30179,
|
| 90 |
+
"[XXXXX176]": 30180,
|
| 91 |
+
"[XXXXX177]": 30181,
|
| 92 |
+
"[XXXXX178]": 30182,
|
| 93 |
+
"[XXXXX179]": 30183,
|
| 94 |
+
"[XXXXX17]": 30021,
|
| 95 |
+
"[XXXXX180]": 30184,
|
| 96 |
+
"[XXXXX181]": 30185,
|
| 97 |
+
"[XXXXX182]": 30186,
|
| 98 |
+
"[XXXXX183]": 30187,
|
| 99 |
+
"[XXXXX184]": 30188,
|
| 100 |
+
"[XXXXX185]": 30189,
|
| 101 |
+
"[XXXXX186]": 30190,
|
| 102 |
+
"[XXXXX187]": 30191,
|
| 103 |
+
"[XXXXX188]": 30192,
|
| 104 |
+
"[XXXXX189]": 30193,
|
| 105 |
+
"[XXXXX18]": 30022,
|
| 106 |
+
"[XXXXX190]": 30194,
|
| 107 |
+
"[XXXXX191]": 30195,
|
| 108 |
+
"[XXXXX192]": 30196,
|
| 109 |
+
"[XXXXX193]": 30197,
|
| 110 |
+
"[XXXXX194]": 30198,
|
| 111 |
+
"[XXXXX195]": 30199,
|
| 112 |
+
"[XXXXX196]": 30200,
|
| 113 |
+
"[XXXXX197]": 30201,
|
| 114 |
+
"[XXXXX198]": 30202,
|
| 115 |
+
"[XXXXX199]": 30203,
|
| 116 |
+
"[XXXXX19]": 30023,
|
| 117 |
+
"[XXXXX1]": 30005,
|
| 118 |
+
"[XXXXX200]": 30204,
|
| 119 |
+
"[XXXXX201]": 30205,
|
| 120 |
+
"[XXXXX202]": 30206,
|
| 121 |
+
"[XXXXX203]": 30207,
|
| 122 |
+
"[XXXXX20]": 30024,
|
| 123 |
+
"[XXXXX21]": 30025,
|
| 124 |
+
"[XXXXX22]": 30026,
|
| 125 |
+
"[XXXXX23]": 30027,
|
| 126 |
+
"[XXXXX24]": 30028,
|
| 127 |
+
"[XXXXX25]": 30029,
|
| 128 |
+
"[XXXXX26]": 30030,
|
| 129 |
+
"[XXXXX27]": 30031,
|
| 130 |
+
"[XXXXX28]": 30032,
|
| 131 |
+
"[XXXXX29]": 30033,
|
| 132 |
+
"[XXXXX2]": 30006,
|
| 133 |
+
"[XXXXX30]": 30034,
|
| 134 |
+
"[XXXXX31]": 30035,
|
| 135 |
+
"[XXXXX32]": 30036,
|
| 136 |
+
"[XXXXX33]": 30037,
|
| 137 |
+
"[XXXXX34]": 30038,
|
| 138 |
+
"[XXXXX35]": 30039,
|
| 139 |
+
"[XXXXX36]": 30040,
|
| 140 |
+
"[XXXXX37]": 30041,
|
| 141 |
+
"[XXXXX38]": 30042,
|
| 142 |
+
"[XXXXX39]": 30043,
|
| 143 |
+
"[XXXXX3]": 30007,
|
| 144 |
+
"[XXXXX40]": 30044,
|
| 145 |
+
"[XXXXX41]": 30045,
|
| 146 |
+
"[XXXXX42]": 30046,
|
| 147 |
+
"[XXXXX43]": 30047,
|
| 148 |
+
"[XXXXX44]": 30048,
|
| 149 |
+
"[XXXXX45]": 30049,
|
| 150 |
+
"[XXXXX46]": 30050,
|
| 151 |
+
"[XXXXX47]": 30051,
|
| 152 |
+
"[XXXXX48]": 30052,
|
| 153 |
+
"[XXXXX49]": 30053,
|
| 154 |
+
"[XXXXX4]": 30008,
|
| 155 |
+
"[XXXXX50]": 30054,
|
| 156 |
+
"[XXXXX51]": 30055,
|
| 157 |
+
"[XXXXX52]": 30056,
|
| 158 |
+
"[XXXXX53]": 30057,
|
| 159 |
+
"[XXXXX54]": 30058,
|
| 160 |
+
"[XXXXX55]": 30059,
|
| 161 |
+
"[XXXXX56]": 30060,
|
| 162 |
+
"[XXXXX57]": 30061,
|
| 163 |
+
"[XXXXX58]": 30062,
|
| 164 |
+
"[XXXXX59]": 30063,
|
| 165 |
+
"[XXXXX5]": 30009,
|
| 166 |
+
"[XXXXX60]": 30064,
|
| 167 |
+
"[XXXXX61]": 30065,
|
| 168 |
+
"[XXXXX62]": 30066,
|
| 169 |
+
"[XXXXX63]": 30067,
|
| 170 |
+
"[XXXXX64]": 30068,
|
| 171 |
+
"[XXXXX65]": 30069,
|
| 172 |
+
"[XXXXX66]": 30070,
|
| 173 |
+
"[XXXXX67]": 30071,
|
| 174 |
+
"[XXXXX68]": 30072,
|
| 175 |
+
"[XXXXX69]": 30073,
|
| 176 |
+
"[XXXXX6]": 30010,
|
| 177 |
+
"[XXXXX70]": 30074,
|
| 178 |
+
"[XXXXX71]": 30075,
|
| 179 |
+
"[XXXXX72]": 30076,
|
| 180 |
+
"[XXXXX73]": 30077,
|
| 181 |
+
"[XXXXX74]": 30078,
|
| 182 |
+
"[XXXXX75]": 30079,
|
| 183 |
+
"[XXXXX76]": 30080,
|
| 184 |
+
"[XXXXX77]": 30081,
|
| 185 |
+
"[XXXXX78]": 30082,
|
| 186 |
+
"[XXXXX79]": 30083,
|
| 187 |
+
"[XXXXX7]": 30011,
|
| 188 |
+
"[XXXXX80]": 30084,
|
| 189 |
+
"[XXXXX81]": 30085,
|
| 190 |
+
"[XXXXX82]": 30086,
|
| 191 |
+
"[XXXXX83]": 30087,
|
| 192 |
+
"[XXXXX84]": 30088,
|
| 193 |
+
"[XXXXX85]": 30089,
|
| 194 |
+
"[XXXXX86]": 30090,
|
| 195 |
+
"[XXXXX87]": 30091,
|
| 196 |
+
"[XXXXX88]": 30092,
|
| 197 |
+
"[XXXXX89]": 30093,
|
| 198 |
+
"[XXXXX8]": 30012,
|
| 199 |
+
"[XXXXX90]": 30094,
|
| 200 |
+
"[XXXXX91]": 30095,
|
| 201 |
+
"[XXXXX92]": 30096,
|
| 202 |
+
"[XXXXX93]": 30097,
|
| 203 |
+
"[XXXXX94]": 30098,
|
| 204 |
+
"[XXXXX95]": 30099,
|
| 205 |
+
"[XXXXX96]": 30100,
|
| 206 |
+
"[XXXXX97]": 30101,
|
| 207 |
+
"[XXXXX98]": 30102,
|
| 208 |
+
"[XXXXX99]": 30103,
|
| 209 |
+
"[XXXXX9]": 30013
|
| 210 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"[XXXXX0]",
|
| 4 |
+
"[XXXXX1]",
|
| 5 |
+
"[XXXXX2]",
|
| 6 |
+
"[XXXXX3]",
|
| 7 |
+
"[XXXXX4]",
|
| 8 |
+
"[XXXXX5]",
|
| 9 |
+
"[XXXXX6]",
|
| 10 |
+
"[XXXXX7]",
|
| 11 |
+
"[XXXXX8]",
|
| 12 |
+
"[XXXXX9]",
|
| 13 |
+
"[XXXXX10]",
|
| 14 |
+
"[XXXXX11]",
|
| 15 |
+
"[XXXXX12]",
|
| 16 |
+
"[XXXXX13]",
|
| 17 |
+
"[XXXXX14]",
|
| 18 |
+
"[XXXXX15]",
|
| 19 |
+
"[XXXXX16]",
|
| 20 |
+
"[XXXXX17]",
|
| 21 |
+
"[XXXXX18]",
|
| 22 |
+
"[XXXXX19]",
|
| 23 |
+
"[XXXXX20]",
|
| 24 |
+
"[XXXXX21]",
|
| 25 |
+
"[XXXXX22]",
|
| 26 |
+
"[XXXXX23]",
|
| 27 |
+
"[XXXXX24]",
|
| 28 |
+
"[XXXXX25]",
|
| 29 |
+
"[XXXXX26]",
|
| 30 |
+
"[XXXXX27]",
|
| 31 |
+
"[XXXXX28]",
|
| 32 |
+
"[XXXXX29]",
|
| 33 |
+
"[XXXXX30]",
|
| 34 |
+
"[XXXXX31]",
|
| 35 |
+
"[XXXXX32]",
|
| 36 |
+
"[XXXXX33]",
|
| 37 |
+
"[XXXXX34]",
|
| 38 |
+
"[XXXXX35]",
|
| 39 |
+
"[XXXXX36]",
|
| 40 |
+
"[XXXXX37]",
|
| 41 |
+
"[XXXXX38]",
|
| 42 |
+
"[XXXXX39]",
|
| 43 |
+
"[XXXXX40]",
|
| 44 |
+
"[XXXXX41]",
|
| 45 |
+
"[XXXXX42]",
|
| 46 |
+
"[XXXXX43]",
|
| 47 |
+
"[XXXXX44]",
|
| 48 |
+
"[XXXXX45]",
|
| 49 |
+
"[XXXXX46]",
|
| 50 |
+
"[XXXXX47]",
|
| 51 |
+
"[XXXXX48]",
|
| 52 |
+
"[XXXXX49]",
|
| 53 |
+
"[XXXXX50]",
|
| 54 |
+
"[XXXXX51]",
|
| 55 |
+
"[XXXXX52]",
|
| 56 |
+
"[XXXXX53]",
|
| 57 |
+
"[XXXXX54]",
|
| 58 |
+
"[XXXXX55]",
|
| 59 |
+
"[XXXXX56]",
|
| 60 |
+
"[XXXXX57]",
|
| 61 |
+
"[XXXXX58]",
|
| 62 |
+
"[XXXXX59]",
|
| 63 |
+
"[XXXXX60]",
|
| 64 |
+
"[XXXXX61]",
|
| 65 |
+
"[XXXXX62]",
|
| 66 |
+
"[XXXXX63]",
|
| 67 |
+
"[XXXXX64]",
|
| 68 |
+
"[XXXXX65]",
|
| 69 |
+
"[XXXXX66]",
|
| 70 |
+
"[XXXXX67]",
|
| 71 |
+
"[XXXXX68]",
|
| 72 |
+
"[XXXXX69]",
|
| 73 |
+
"[XXXXX70]",
|
| 74 |
+
"[XXXXX71]",
|
| 75 |
+
"[XXXXX72]",
|
| 76 |
+
"[XXXXX73]",
|
| 77 |
+
"[XXXXX74]",
|
| 78 |
+
"[XXXXX75]",
|
| 79 |
+
"[XXXXX76]",
|
| 80 |
+
"[XXXXX77]",
|
| 81 |
+
"[XXXXX78]",
|
| 82 |
+
"[XXXXX79]",
|
| 83 |
+
"[XXXXX80]",
|
| 84 |
+
"[XXXXX81]",
|
| 85 |
+
"[XXXXX82]",
|
| 86 |
+
"[XXXXX83]",
|
| 87 |
+
"[XXXXX84]",
|
| 88 |
+
"[XXXXX85]",
|
| 89 |
+
"[XXXXX86]",
|
| 90 |
+
"[XXXXX87]",
|
| 91 |
+
"[XXXXX88]",
|
| 92 |
+
"[XXXXX89]",
|
| 93 |
+
"[XXXXX90]",
|
| 94 |
+
"[XXXXX91]",
|
| 95 |
+
"[XXXXX92]",
|
| 96 |
+
"[XXXXX93]",
|
| 97 |
+
"[XXXXX94]",
|
| 98 |
+
"[XXXXX95]",
|
| 99 |
+
"[XXXXX96]",
|
| 100 |
+
"[XXXXX97]",
|
| 101 |
+
"[XXXXX98]",
|
| 102 |
+
"[XXXXX99]",
|
| 103 |
+
"[XXXXX100]",
|
| 104 |
+
"[XXXXX101]",
|
| 105 |
+
"[XXXXX102]",
|
| 106 |
+
"[XXXXX103]",
|
| 107 |
+
"[XXXXX104]",
|
| 108 |
+
"[XXXXX105]",
|
| 109 |
+
"[XXXXX106]",
|
| 110 |
+
"[XXXXX107]",
|
| 111 |
+
"[XXXXX108]",
|
| 112 |
+
"[XXXXX109]",
|
| 113 |
+
"[XXXXX110]",
|
| 114 |
+
"[XXXXX111]",
|
| 115 |
+
"[XXXXX112]",
|
| 116 |
+
"[XXXXX113]",
|
| 117 |
+
"[XXXXX114]",
|
| 118 |
+
"[XXXXX115]",
|
| 119 |
+
"[XXXXX116]",
|
| 120 |
+
"[XXXXX117]",
|
| 121 |
+
"[XXXXX118]",
|
| 122 |
+
"[XXXXX119]",
|
| 123 |
+
"[XXXXX120]",
|
| 124 |
+
"[XXXXX121]",
|
| 125 |
+
"[XXXXX122]",
|
| 126 |
+
"[XXXXX123]",
|
| 127 |
+
"[XXXXX124]",
|
| 128 |
+
"[XXXXX125]",
|
| 129 |
+
"[XXXXX126]",
|
| 130 |
+
"[XXXXX127]",
|
| 131 |
+
"[XXXXX128]",
|
| 132 |
+
"[XXXXX129]",
|
| 133 |
+
"[XXXXX130]",
|
| 134 |
+
"[XXXXX131]",
|
| 135 |
+
"[XXXXX132]",
|
| 136 |
+
"[XXXXX133]",
|
| 137 |
+
"[XXXXX134]",
|
| 138 |
+
"[XXXXX135]",
|
| 139 |
+
"[XXXXX136]",
|
| 140 |
+
"[XXXXX137]",
|
| 141 |
+
"[XXXXX138]",
|
| 142 |
+
"[XXXXX139]",
|
| 143 |
+
"[XXXXX140]",
|
| 144 |
+
"[XXXXX141]",
|
| 145 |
+
"[XXXXX142]",
|
| 146 |
+
"[XXXXX143]",
|
| 147 |
+
"[XXXXX144]",
|
| 148 |
+
"[XXXXX145]",
|
| 149 |
+
"[XXXXX146]",
|
| 150 |
+
"[XXXXX147]",
|
| 151 |
+
"[XXXXX148]",
|
| 152 |
+
"[XXXXX149]",
|
| 153 |
+
"[XXXXX150]",
|
| 154 |
+
"[XXXXX151]",
|
| 155 |
+
"[XXXXX152]",
|
| 156 |
+
"[XXXXX153]",
|
| 157 |
+
"[XXXXX154]",
|
| 158 |
+
"[XXXXX155]",
|
| 159 |
+
"[XXXXX156]",
|
| 160 |
+
"[XXXXX157]",
|
| 161 |
+
"[XXXXX158]",
|
| 162 |
+
"[XXXXX159]",
|
| 163 |
+
"[XXXXX160]",
|
| 164 |
+
"[XXXXX161]",
|
| 165 |
+
"[XXXXX162]",
|
| 166 |
+
"[XXXXX163]",
|
| 167 |
+
"[XXXXX164]",
|
| 168 |
+
"[XXXXX165]",
|
| 169 |
+
"[XXXXX166]",
|
| 170 |
+
"[XXXXX167]",
|
| 171 |
+
"[XXXXX168]",
|
| 172 |
+
"[XXXXX169]",
|
| 173 |
+
"[XXXXX170]",
|
| 174 |
+
"[XXXXX171]",
|
| 175 |
+
"[XXXXX172]",
|
| 176 |
+
"[XXXXX173]",
|
| 177 |
+
"[XXXXX174]",
|
| 178 |
+
"[XXXXX175]",
|
| 179 |
+
"[XXXXX176]",
|
| 180 |
+
"[XXXXX177]",
|
| 181 |
+
"[XXXXX178]",
|
| 182 |
+
"[XXXXX179]",
|
| 183 |
+
"[XXXXX180]",
|
| 184 |
+
"[XXXXX181]",
|
| 185 |
+
"[XXXXX182]",
|
| 186 |
+
"[XXXXX183]",
|
| 187 |
+
"[XXXXX184]",
|
| 188 |
+
"[XXXXX185]",
|
| 189 |
+
"[XXXXX186]",
|
| 190 |
+
"[XXXXX187]",
|
| 191 |
+
"[XXXXX188]",
|
| 192 |
+
"[XXXXX189]",
|
| 193 |
+
"[XXXXX190]",
|
| 194 |
+
"[XXXXX191]",
|
| 195 |
+
"[XXXXX192]",
|
| 196 |
+
"[XXXXX193]",
|
| 197 |
+
"[XXXXX194]",
|
| 198 |
+
"[XXXXX195]",
|
| 199 |
+
"[XXXXX196]",
|
| 200 |
+
"[XXXXX197]",
|
| 201 |
+
"[XXXXX198]",
|
| 202 |
+
"[XXXXX199]",
|
| 203 |
+
"[XXXXX200]",
|
| 204 |
+
"[XXXXX201]",
|
| 205 |
+
"[XXXXX202]",
|
| 206 |
+
"[XXXXX203]"
|
| 207 |
+
],
|
| 208 |
+
"bos_token": "[CLS]",
|
| 209 |
+
"cls_token": "[CLS]",
|
| 210 |
+
"eos_token": "[SEP]",
|
| 211 |
+
"mask_token": {
|
| 212 |
+
"content": "[MASK]",
|
| 213 |
+
"lstrip": true,
|
| 214 |
+
"normalized": false,
|
| 215 |
+
"rstrip": false,
|
| 216 |
+
"single_word": false
|
| 217 |
+
},
|
| 218 |
+
"pad_token": "<pad>",
|
| 219 |
+
"sep_token": "[SEP]",
|
| 220 |
+
"unk_token": "<unk>"
|
| 221 |
+
}
|
spiece.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1840794a944b1ca5d804431a8fb26e42fa55680e0c845315adef099c24d322d5
|
| 3 |
+
size 756326
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "[CLS]",
|
| 3 |
+
"clean_up_tokenization_spaces": true,
|
| 4 |
+
"cls_token": "[CLS]",
|
| 5 |
+
"do_lower_case": false,
|
| 6 |
+
"eos_token": "[SEP]",
|
| 7 |
+
"keep_accents": false,
|
| 8 |
+
"mask_token": {
|
| 9 |
+
"__type": "AddedToken",
|
| 10 |
+
"content": "[MASK]",
|
| 11 |
+
"lstrip": true,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 17 |
+
"pad_token": "<pad>",
|
| 18 |
+
"remove_space": true,
|
| 19 |
+
"sep_token": "[SEP]",
|
| 20 |
+
"sp_model_kwargs": {},
|
| 21 |
+
"tokenizer_class": "AlbertTokenizer",
|
| 22 |
+
"unk_token": "<unk>"
|
| 23 |
+
}
|