Commit
·
56646ff
1
Parent(s):
6a1aeb6
Upload tokenizer
Browse files- merges.txt +120 -62
- tokenizer.json +272 -161
- vocab.json +1 -1
merges.txt
CHANGED
|
@@ -1,94 +1,152 @@
|
|
| 1 |
#version: 0.2
|
|
|
|
|
|
|
|
|
|
| 2 |
i n
|
| 3 |
i s
|
| 4 |
-
|
| 5 |
Ġ o
|
|
|
|
| 6 |
Ġ is
|
| 7 |
a l
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
a n
|
| 13 |
-
a t
|
| 14 |
-
a y
|
| 15 |
-
a te
|
| 16 |
-
b s
|
| 17 |
b u
|
| 18 |
-
c h
|
| 19 |
-
c o
|
| 20 |
c t
|
| 21 |
-
d y
|
| 22 |
e d
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
e n
|
| 24 |
e s
|
|
|
|
| 25 |
e y
|
|
|
|
| 26 |
e ct
|
| 27 |
-
f
|
|
|
|
| 28 |
g s
|
| 29 |
-
g
|
| 30 |
-
|
| 31 |
-
h
|
| 32 |
-
i
|
| 33 |
i dy
|
| 34 |
i en
|
| 35 |
-
|
| 36 |
j ect
|
| 37 |
k ed
|
| 38 |
l in
|
| 39 |
-
m on
|
| 40 |
m an
|
| 41 |
-
n
|
| 42 |
-
n
|
| 43 |
-
o
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
r ien
|
| 47 |
-
r lin
|
| 48 |
s in
|
| 49 |
-
s
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
u e
|
| 52 |
-
|
| 53 |
v al
|
| 54 |
-
w
|
| 55 |
-
|
| 56 |
-
Ġ
|
| 57 |
Ġ bu
|
| 58 |
-
Ġ
|
| 59 |
-
Ġ
|
| 60 |
-
Ġ
|
| 61 |
-
Ġ
|
| 62 |
-
Ġ nat
|
| 63 |
-
Ġ pr
|
| 64 |
-
Ġ thin
|
| 65 |
Ġ val
|
| 66 |
-
Ġ
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
|
|
|
|
|
|
| 71 |
Ġo man
|
| 72 |
-
|
| 73 |
al s
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
Su bs
|
| 75 |
-
es
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
|
|
|
| 89 |
Ġval ue
|
| 90 |
-
|
| 91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
Subs idy
|
|
|
|
| 93 |
Ġinterlin ked
|
| 94 |
-
Ġbusinessgate ways
|
|
|
|
| 1 |
#version: 0.2
|
| 2 |
+
o r
|
| 3 |
+
e r
|
| 4 |
+
a t
|
| 5 |
i n
|
| 6 |
i s
|
| 7 |
+
o n
|
| 8 |
Ġ o
|
| 9 |
+
Ġ a
|
| 10 |
Ġ is
|
| 11 |
a l
|
| 12 |
+
f or
|
| 13 |
+
i on
|
| 14 |
+
r e
|
| 15 |
+
t r
|
| 16 |
+
Ġ c
|
| 17 |
+
Ġ m
|
| 18 |
+
Ġ s
|
| 19 |
+
Ġ for
|
| 20 |
+
at ion
|
| 21 |
+
Ġo f
|
| 22 |
a n
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
b u
|
|
|
|
|
|
|
| 24 |
c t
|
|
|
|
| 25 |
e d
|
| 26 |
+
i c
|
| 27 |
+
i er
|
| 28 |
+
l ier
|
| 29 |
+
p p
|
| 30 |
+
p r
|
| 31 |
+
p er
|
| 32 |
+
r s
|
| 33 |
+
s rs
|
| 34 |
+
t h
|
| 35 |
+
u pp
|
| 36 |
+
y s
|
| 37 |
+
Ġ j
|
| 38 |
+
Ġ th
|
| 39 |
+
or s
|
| 40 |
+
at ors
|
| 41 |
+
in t
|
| 42 |
+
Ġa re
|
| 43 |
+
pr o
|
| 44 |
+
per ators
|
| 45 |
+
upp lier
|
| 46 |
+
I n
|
| 47 |
+
J srs
|
| 48 |
+
S u
|
| 49 |
+
a j
|
| 50 |
+
a ys
|
| 51 |
+
b s
|
| 52 |
+
d u
|
| 53 |
+
d y
|
| 54 |
+
e m
|
| 55 |
e n
|
| 56 |
e s
|
| 57 |
+
e w
|
| 58 |
e y
|
| 59 |
+
e an
|
| 60 |
e ct
|
| 61 |
+
f ic
|
| 62 |
+
g i
|
| 63 |
g s
|
| 64 |
+
g at
|
| 65 |
+
g is
|
| 66 |
+
h ic
|
| 67 |
+
i bu
|
| 68 |
i dy
|
| 69 |
i en
|
| 70 |
+
i fic
|
| 71 |
j ect
|
| 72 |
k ed
|
| 73 |
l in
|
|
|
|
| 74 |
m an
|
| 75 |
+
n tr
|
| 76 |
+
n ation
|
| 77 |
+
o u
|
| 78 |
+
o int
|
| 79 |
+
o perators
|
|
|
|
|
|
|
| 80 |
s in
|
| 81 |
+
s upplier
|
| 82 |
+
s gat
|
| 83 |
+
t or
|
| 84 |
+
t ed
|
| 85 |
+
t em
|
| 86 |
+
t ific
|
| 87 |
u e
|
| 88 |
+
v er
|
| 89 |
v al
|
| 90 |
+
w hic
|
| 91 |
+
Ġ or
|
| 92 |
+
Ġ re
|
| 93 |
Ġ bu
|
| 94 |
+
Ġ int
|
| 95 |
+
Ġ pro
|
| 96 |
+
Ġ gi
|
| 97 |
+
Ġ nation
|
|
|
|
|
|
|
|
|
|
| 98 |
Ġ val
|
| 99 |
+
Ġ whic
|
| 100 |
+
er lin
|
| 101 |
+
er tific
|
| 102 |
+
in gs
|
| 103 |
+
on tr
|
| 104 |
+
on ey
|
| 105 |
+
Ġo perators
|
| 106 |
Ġo man
|
| 107 |
+
al e
|
| 108 |
al s
|
| 109 |
+
tr ation
|
| 110 |
+
Ġc ou
|
| 111 |
+
Ġc ertific
|
| 112 |
+
Ġc ontr
|
| 113 |
+
Ġm aj
|
| 114 |
+
Ġm ean
|
| 115 |
+
Ġm oney
|
| 116 |
+
Ġs ys
|
| 117 |
+
Ġs upplier
|
| 118 |
+
Ġs ale
|
| 119 |
+
ct s
|
| 120 |
+
Ġj srs
|
| 121 |
+
Ġj oint
|
| 122 |
+
Ġth e
|
| 123 |
+
Ġth ings
|
| 124 |
+
pro du
|
| 125 |
Su bs
|
| 126 |
+
es sgat
|
| 127 |
+
ew ays
|
| 128 |
+
gis tration
|
| 129 |
+
ibu tor
|
| 130 |
+
ien ted
|
| 131 |
+
ntr y
|
| 132 |
+
sin essgat
|
| 133 |
+
supplier s
|
| 134 |
+
Ġor iented
|
| 135 |
+
Ġre gistration
|
| 136 |
+
Ġbu sinessgat
|
| 137 |
+
Ġint erlin
|
| 138 |
+
Ġpro ject
|
| 139 |
+
Ġgi ver
|
| 140 |
+
Ġnation als
|
| 141 |
Ġval ue
|
| 142 |
+
Ġwhic h
|
| 143 |
+
Ġcou ntry
|
| 144 |
+
Ġcertific ation
|
| 145 |
+
Ġcontr ibutor
|
| 146 |
+
Ġmaj or
|
| 147 |
+
Ġmean s
|
| 148 |
+
Ġsys tem
|
| 149 |
+
produ cts
|
| 150 |
Subs idy
|
| 151 |
+
Ġbusinessgat eways
|
| 152 |
Ġinterlin ked
|
|
|
tokenizer.json
CHANGED
|
@@ -1,11 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"version": "1.0",
|
| 3 |
-
"truncation":
|
| 4 |
-
"direction": "Right",
|
| 5 |
-
"max_length": 40,
|
| 6 |
-
"strategy": "LongestFirst",
|
| 7 |
-
"stride": 0
|
| 8 |
-
},
|
| 9 |
"padding": null,
|
| 10 |
"added_tokens": [
|
| 11 |
{
|
|
@@ -303,194 +298,310 @@
|
|
| 303 |
"Ł": 254,
|
| 304 |
"ł": 255,
|
| 305 |
"Ń": 256,
|
| 306 |
-
"
|
| 307 |
-
"
|
| 308 |
-
"
|
| 309 |
-
"
|
| 310 |
-
"
|
| 311 |
-
"
|
| 312 |
-
"
|
| 313 |
"Ġa": 264,
|
| 314 |
-
"
|
| 315 |
-
"
|
| 316 |
-
"
|
| 317 |
-
"
|
| 318 |
-
"
|
| 319 |
-
"
|
| 320 |
-
"
|
| 321 |
-
"
|
| 322 |
-
"
|
| 323 |
-
"
|
| 324 |
-
"
|
| 325 |
-
"
|
| 326 |
-
"
|
| 327 |
-
"
|
| 328 |
-
"
|
| 329 |
-
"
|
| 330 |
-
"
|
| 331 |
-
"
|
| 332 |
-
"
|
| 333 |
-
"
|
| 334 |
-
"
|
| 335 |
-
"
|
| 336 |
-
"
|
| 337 |
-
"
|
| 338 |
-
"
|
| 339 |
-
"
|
| 340 |
-
"
|
| 341 |
-
"
|
| 342 |
-
"
|
| 343 |
-
"
|
| 344 |
-
"
|
| 345 |
-
"
|
| 346 |
-
"
|
| 347 |
-
"
|
| 348 |
-
"
|
| 349 |
-
"
|
| 350 |
-
"
|
| 351 |
-
"
|
| 352 |
-
"
|
| 353 |
-
"
|
| 354 |
-
"
|
| 355 |
-
"
|
| 356 |
-
"
|
| 357 |
-
"
|
| 358 |
-
"
|
| 359 |
-
"
|
| 360 |
-
"
|
| 361 |
-
"
|
| 362 |
-
"
|
| 363 |
-
"
|
| 364 |
-
"
|
| 365 |
-
"
|
| 366 |
-
"
|
| 367 |
-
"
|
| 368 |
-
"
|
| 369 |
-
"
|
| 370 |
-
"
|
| 371 |
-
"
|
| 372 |
-
"
|
| 373 |
-
"
|
| 374 |
-
"
|
| 375 |
-
"
|
| 376 |
-
"
|
| 377 |
-
"
|
| 378 |
-
"
|
| 379 |
-
"
|
| 380 |
-
"
|
| 381 |
-
"
|
| 382 |
-
"
|
| 383 |
-
"
|
| 384 |
-
"
|
| 385 |
-
"
|
| 386 |
-
"
|
| 387 |
-
"
|
| 388 |
-
"
|
| 389 |
-
"
|
| 390 |
-
"
|
| 391 |
-
"
|
| 392 |
-
"
|
| 393 |
-
"
|
| 394 |
-
"
|
| 395 |
-
"
|
| 396 |
-
"
|
| 397 |
-
"
|
| 398 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 399 |
},
|
| 400 |
"merges": [
|
|
|
|
|
|
|
|
|
|
| 401 |
"i n",
|
| 402 |
"i s",
|
| 403 |
-
"
|
| 404 |
"Ġ o",
|
|
|
|
| 405 |
"Ġ is",
|
| 406 |
"a l",
|
| 407 |
-
"
|
| 408 |
-
"
|
| 409 |
-
"
|
| 410 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
"a n",
|
| 412 |
-
"a t",
|
| 413 |
-
"a y",
|
| 414 |
-
"a te",
|
| 415 |
-
"b s",
|
| 416 |
"b u",
|
| 417 |
-
"c h",
|
| 418 |
-
"c o",
|
| 419 |
"c t",
|
| 420 |
-
"d y",
|
| 421 |
"e d",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 422 |
"e n",
|
| 423 |
"e s",
|
|
|
|
| 424 |
"e y",
|
|
|
|
| 425 |
"e ct",
|
| 426 |
-
"f
|
|
|
|
| 427 |
"g s",
|
| 428 |
-
"g
|
| 429 |
-
"
|
| 430 |
-
"h
|
| 431 |
-
"i
|
| 432 |
"i dy",
|
| 433 |
"i en",
|
| 434 |
-
"
|
| 435 |
"j ect",
|
| 436 |
"k ed",
|
| 437 |
"l in",
|
| 438 |
-
"m on",
|
| 439 |
"m an",
|
| 440 |
-
"n
|
| 441 |
-
"n
|
| 442 |
-
"o
|
| 443 |
-
"
|
| 444 |
-
"
|
| 445 |
-
"r ien",
|
| 446 |
-
"r lin",
|
| 447 |
"s in",
|
| 448 |
-
"s
|
| 449 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 450 |
"u e",
|
| 451 |
-
"
|
| 452 |
"v al",
|
| 453 |
-
"w
|
| 454 |
-
"
|
| 455 |
-
"Ġ
|
| 456 |
"Ġ bu",
|
| 457 |
-
"Ġ
|
| 458 |
-
"Ġ
|
| 459 |
-
"Ġ
|
| 460 |
-
"Ġ
|
| 461 |
-
"Ġ nat",
|
| 462 |
-
"Ġ pr",
|
| 463 |
-
"Ġ thin",
|
| 464 |
"Ġ val",
|
| 465 |
-
"Ġ
|
| 466 |
-
"
|
| 467 |
-
"
|
| 468 |
-
"
|
| 469 |
-
"
|
|
|
|
|
|
|
| 470 |
"Ġo man",
|
| 471 |
-
"
|
| 472 |
"al s",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 473 |
"Su bs",
|
| 474 |
-
"es
|
| 475 |
-
"
|
| 476 |
-
"
|
| 477 |
-
"
|
| 478 |
-
"
|
| 479 |
-
"
|
| 480 |
-
"
|
| 481 |
-
"
|
| 482 |
-
"
|
| 483 |
-
"
|
| 484 |
-
"
|
| 485 |
-
"
|
| 486 |
-
"
|
| 487 |
-
"
|
|
|
|
| 488 |
"Ġval ue",
|
| 489 |
-
"
|
| 490 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 491 |
"Subs idy",
|
| 492 |
-
"
|
| 493 |
-
"
|
| 494 |
]
|
| 495 |
}
|
| 496 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"version": "1.0",
|
| 3 |
+
"truncation": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
"padding": null,
|
| 5 |
"added_tokens": [
|
| 6 |
{
|
|
|
|
| 298 |
"Ł": 254,
|
| 299 |
"ł": 255,
|
| 300 |
"Ń": 256,
|
| 301 |
+
"or": 257,
|
| 302 |
+
"er": 258,
|
| 303 |
+
"at": 259,
|
| 304 |
+
"in": 260,
|
| 305 |
+
"is": 261,
|
| 306 |
+
"on": 262,
|
| 307 |
+
"Ġo": 263,
|
| 308 |
"Ġa": 264,
|
| 309 |
+
"Ġis": 265,
|
| 310 |
+
"al": 266,
|
| 311 |
+
"for": 267,
|
| 312 |
+
"ion": 268,
|
| 313 |
+
"re": 269,
|
| 314 |
+
"tr": 270,
|
| 315 |
+
"Ġc": 271,
|
| 316 |
+
"Ġm": 272,
|
| 317 |
+
"Ġs": 273,
|
| 318 |
+
"Ġfor": 274,
|
| 319 |
+
"ation": 275,
|
| 320 |
+
"Ġof": 276,
|
| 321 |
+
"an": 277,
|
| 322 |
+
"bu": 278,
|
| 323 |
+
"ct": 279,
|
| 324 |
+
"ed": 280,
|
| 325 |
+
"ic": 281,
|
| 326 |
+
"ier": 282,
|
| 327 |
+
"lier": 283,
|
| 328 |
+
"pp": 284,
|
| 329 |
+
"pr": 285,
|
| 330 |
+
"per": 286,
|
| 331 |
+
"rs": 287,
|
| 332 |
+
"srs": 288,
|
| 333 |
+
"th": 289,
|
| 334 |
+
"upp": 290,
|
| 335 |
+
"ys": 291,
|
| 336 |
+
"Ġj": 292,
|
| 337 |
+
"Ġth": 293,
|
| 338 |
+
"ors": 294,
|
| 339 |
+
"ators": 295,
|
| 340 |
+
"int": 296,
|
| 341 |
+
"Ġare": 297,
|
| 342 |
+
"pro": 298,
|
| 343 |
+
"perators": 299,
|
| 344 |
+
"upplier": 300,
|
| 345 |
+
"In": 301,
|
| 346 |
+
"Jsrs": 302,
|
| 347 |
+
"Su": 303,
|
| 348 |
+
"aj": 304,
|
| 349 |
+
"ays": 305,
|
| 350 |
+
"bs": 306,
|
| 351 |
+
"du": 307,
|
| 352 |
+
"dy": 308,
|
| 353 |
+
"em": 309,
|
| 354 |
+
"en": 310,
|
| 355 |
+
"es": 311,
|
| 356 |
+
"ew": 312,
|
| 357 |
+
"ey": 313,
|
| 358 |
+
"ean": 314,
|
| 359 |
+
"ect": 315,
|
| 360 |
+
"fic": 316,
|
| 361 |
+
"gi": 317,
|
| 362 |
+
"gs": 318,
|
| 363 |
+
"gat": 319,
|
| 364 |
+
"gis": 320,
|
| 365 |
+
"hic": 321,
|
| 366 |
+
"ibu": 322,
|
| 367 |
+
"idy": 323,
|
| 368 |
+
"ien": 324,
|
| 369 |
+
"ific": 325,
|
| 370 |
+
"ject": 326,
|
| 371 |
+
"ked": 327,
|
| 372 |
+
"lin": 328,
|
| 373 |
+
"man": 329,
|
| 374 |
+
"ntr": 330,
|
| 375 |
+
"nation": 331,
|
| 376 |
+
"ou": 332,
|
| 377 |
+
"oint": 333,
|
| 378 |
+
"operators": 334,
|
| 379 |
+
"sin": 335,
|
| 380 |
+
"supplier": 336,
|
| 381 |
+
"sgat": 337,
|
| 382 |
+
"tor": 338,
|
| 383 |
+
"ted": 339,
|
| 384 |
+
"tem": 340,
|
| 385 |
+
"tific": 341,
|
| 386 |
+
"ue": 342,
|
| 387 |
+
"ver": 343,
|
| 388 |
+
"val": 344,
|
| 389 |
+
"whic": 345,
|
| 390 |
+
"Ġor": 346,
|
| 391 |
+
"Ġre": 347,
|
| 392 |
+
"Ġbu": 348,
|
| 393 |
+
"Ġint": 349,
|
| 394 |
+
"Ġpro": 350,
|
| 395 |
+
"Ġgi": 351,
|
| 396 |
+
"Ġnation": 352,
|
| 397 |
+
"Ġval": 353,
|
| 398 |
+
"Ġwhic": 354,
|
| 399 |
+
"erlin": 355,
|
| 400 |
+
"ertific": 356,
|
| 401 |
+
"ings": 357,
|
| 402 |
+
"ontr": 358,
|
| 403 |
+
"oney": 359,
|
| 404 |
+
"Ġoperators": 360,
|
| 405 |
+
"Ġoman": 361,
|
| 406 |
+
"ale": 362,
|
| 407 |
+
"als": 363,
|
| 408 |
+
"tration": 364,
|
| 409 |
+
"Ġcou": 365,
|
| 410 |
+
"Ġcertific": 366,
|
| 411 |
+
"Ġcontr": 367,
|
| 412 |
+
"Ġmaj": 368,
|
| 413 |
+
"Ġmean": 369,
|
| 414 |
+
"Ġmoney": 370,
|
| 415 |
+
"Ġsys": 371,
|
| 416 |
+
"Ġsupplier": 372,
|
| 417 |
+
"Ġsale": 373,
|
| 418 |
+
"cts": 374,
|
| 419 |
+
"Ġjsrs": 375,
|
| 420 |
+
"Ġjoint": 376,
|
| 421 |
+
"Ġthe": 377,
|
| 422 |
+
"Ġthings": 378,
|
| 423 |
+
"produ": 379,
|
| 424 |
+
"Subs": 380,
|
| 425 |
+
"essgat": 381,
|
| 426 |
+
"eways": 382,
|
| 427 |
+
"gistration": 383,
|
| 428 |
+
"ibutor": 384,
|
| 429 |
+
"iented": 385,
|
| 430 |
+
"ntry": 386,
|
| 431 |
+
"sinessgat": 387,
|
| 432 |
+
"suppliers": 388,
|
| 433 |
+
"Ġoriented": 389,
|
| 434 |
+
"Ġregistration": 390,
|
| 435 |
+
"Ġbusinessgat": 391,
|
| 436 |
+
"Ġinterlin": 392,
|
| 437 |
+
"Ġproject": 393,
|
| 438 |
+
"Ġgiver": 394,
|
| 439 |
+
"Ġnationals": 395,
|
| 440 |
+
"Ġvalue": 396,
|
| 441 |
+
"Ġwhich": 397,
|
| 442 |
+
"Ġcountry": 398,
|
| 443 |
+
"Ġcertification": 399,
|
| 444 |
+
"Ġcontributor": 400,
|
| 445 |
+
"Ġmajor": 401,
|
| 446 |
+
"Ġmeans": 402,
|
| 447 |
+
"Ġsystem": 403,
|
| 448 |
+
"products": 404,
|
| 449 |
+
"Subsidy": 405,
|
| 450 |
+
"Ġbusinessgateways": 406,
|
| 451 |
+
"Ġinterlinked": 407
|
| 452 |
},
|
| 453 |
"merges": [
|
| 454 |
+
"o r",
|
| 455 |
+
"e r",
|
| 456 |
+
"a t",
|
| 457 |
"i n",
|
| 458 |
"i s",
|
| 459 |
+
"o n",
|
| 460 |
"Ġ o",
|
| 461 |
+
"Ġ a",
|
| 462 |
"Ġ is",
|
| 463 |
"a l",
|
| 464 |
+
"f or",
|
| 465 |
+
"i on",
|
| 466 |
+
"r e",
|
| 467 |
+
"t r",
|
| 468 |
+
"Ġ c",
|
| 469 |
+
"Ġ m",
|
| 470 |
+
"Ġ s",
|
| 471 |
+
"Ġ for",
|
| 472 |
+
"at ion",
|
| 473 |
+
"Ġo f",
|
| 474 |
"a n",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 475 |
"b u",
|
|
|
|
|
|
|
| 476 |
"c t",
|
|
|
|
| 477 |
"e d",
|
| 478 |
+
"i c",
|
| 479 |
+
"i er",
|
| 480 |
+
"l ier",
|
| 481 |
+
"p p",
|
| 482 |
+
"p r",
|
| 483 |
+
"p er",
|
| 484 |
+
"r s",
|
| 485 |
+
"s rs",
|
| 486 |
+
"t h",
|
| 487 |
+
"u pp",
|
| 488 |
+
"y s",
|
| 489 |
+
"Ġ j",
|
| 490 |
+
"Ġ th",
|
| 491 |
+
"or s",
|
| 492 |
+
"at ors",
|
| 493 |
+
"in t",
|
| 494 |
+
"Ġa re",
|
| 495 |
+
"pr o",
|
| 496 |
+
"per ators",
|
| 497 |
+
"upp lier",
|
| 498 |
+
"I n",
|
| 499 |
+
"J srs",
|
| 500 |
+
"S u",
|
| 501 |
+
"a j",
|
| 502 |
+
"a ys",
|
| 503 |
+
"b s",
|
| 504 |
+
"d u",
|
| 505 |
+
"d y",
|
| 506 |
+
"e m",
|
| 507 |
"e n",
|
| 508 |
"e s",
|
| 509 |
+
"e w",
|
| 510 |
"e y",
|
| 511 |
+
"e an",
|
| 512 |
"e ct",
|
| 513 |
+
"f ic",
|
| 514 |
+
"g i",
|
| 515 |
"g s",
|
| 516 |
+
"g at",
|
| 517 |
+
"g is",
|
| 518 |
+
"h ic",
|
| 519 |
+
"i bu",
|
| 520 |
"i dy",
|
| 521 |
"i en",
|
| 522 |
+
"i fic",
|
| 523 |
"j ect",
|
| 524 |
"k ed",
|
| 525 |
"l in",
|
|
|
|
| 526 |
"m an",
|
| 527 |
+
"n tr",
|
| 528 |
+
"n ation",
|
| 529 |
+
"o u",
|
| 530 |
+
"o int",
|
| 531 |
+
"o perators",
|
|
|
|
|
|
|
| 532 |
"s in",
|
| 533 |
+
"s upplier",
|
| 534 |
+
"s gat",
|
| 535 |
+
"t or",
|
| 536 |
+
"t ed",
|
| 537 |
+
"t em",
|
| 538 |
+
"t ific",
|
| 539 |
"u e",
|
| 540 |
+
"v er",
|
| 541 |
"v al",
|
| 542 |
+
"w hic",
|
| 543 |
+
"Ġ or",
|
| 544 |
+
"Ġ re",
|
| 545 |
"Ġ bu",
|
| 546 |
+
"Ġ int",
|
| 547 |
+
"Ġ pro",
|
| 548 |
+
"Ġ gi",
|
| 549 |
+
"Ġ nation",
|
|
|
|
|
|
|
|
|
|
| 550 |
"Ġ val",
|
| 551 |
+
"Ġ whic",
|
| 552 |
+
"er lin",
|
| 553 |
+
"er tific",
|
| 554 |
+
"in gs",
|
| 555 |
+
"on tr",
|
| 556 |
+
"on ey",
|
| 557 |
+
"Ġo perators",
|
| 558 |
"Ġo man",
|
| 559 |
+
"al e",
|
| 560 |
"al s",
|
| 561 |
+
"tr ation",
|
| 562 |
+
"Ġc ou",
|
| 563 |
+
"Ġc ertific",
|
| 564 |
+
"Ġc ontr",
|
| 565 |
+
"Ġm aj",
|
| 566 |
+
"Ġm ean",
|
| 567 |
+
"Ġm oney",
|
| 568 |
+
"Ġs ys",
|
| 569 |
+
"Ġs upplier",
|
| 570 |
+
"Ġs ale",
|
| 571 |
+
"ct s",
|
| 572 |
+
"Ġj srs",
|
| 573 |
+
"Ġj oint",
|
| 574 |
+
"Ġth e",
|
| 575 |
+
"Ġth ings",
|
| 576 |
+
"pro du",
|
| 577 |
"Su bs",
|
| 578 |
+
"es sgat",
|
| 579 |
+
"ew ays",
|
| 580 |
+
"gis tration",
|
| 581 |
+
"ibu tor",
|
| 582 |
+
"ien ted",
|
| 583 |
+
"ntr y",
|
| 584 |
+
"sin essgat",
|
| 585 |
+
"supplier s",
|
| 586 |
+
"Ġor iented",
|
| 587 |
+
"Ġre gistration",
|
| 588 |
+
"Ġbu sinessgat",
|
| 589 |
+
"Ġint erlin",
|
| 590 |
+
"Ġpro ject",
|
| 591 |
+
"Ġgi ver",
|
| 592 |
+
"Ġnation als",
|
| 593 |
"Ġval ue",
|
| 594 |
+
"Ġwhic h",
|
| 595 |
+
"Ġcou ntry",
|
| 596 |
+
"Ġcertific ation",
|
| 597 |
+
"Ġcontr ibutor",
|
| 598 |
+
"Ġmaj or",
|
| 599 |
+
"Ġmean s",
|
| 600 |
+
"Ġsys tem",
|
| 601 |
+
"produ cts",
|
| 602 |
"Subs idy",
|
| 603 |
+
"Ġbusinessgat eways",
|
| 604 |
+
"Ġinterlin ked"
|
| 605 |
]
|
| 606 |
}
|
| 607 |
}
|
vocab.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"<|endoftext|>":0,"!":1,"\"":2,"#":3,"$":4,"%":5,"&":6,"'":7,"(":8,")":9,"*":10,"+":11,",":12,"-":13,".":14,"/":15,"0":16,"1":17,"2":18,"3":19,"4":20,"5":21,"6":22,"7":23,"8":24,"9":25,":":26,";":27,"<":28,"=":29,">":30,"?":31,"@":32,"A":33,"B":34,"C":35,"D":36,"E":37,"F":38,"G":39,"H":40,"I":41,"J":42,"K":43,"L":44,"M":45,"N":46,"O":47,"P":48,"Q":49,"R":50,"S":51,"T":52,"U":53,"V":54,"W":55,"X":56,"Y":57,"Z":58,"[":59,"\\":60,"]":61,"^":62,"_":63,"`":64,"a":65,"b":66,"c":67,"d":68,"e":69,"f":70,"g":71,"h":72,"i":73,"j":74,"k":75,"l":76,"m":77,"n":78,"o":79,"p":80,"q":81,"r":82,"s":83,"t":84,"u":85,"v":86,"w":87,"x":88,"y":89,"z":90,"{":91,"|":92,"}":93,"~":94,"¡":95,"¢":96,"£":97,"¤":98,"¥":99,"¦":100,"§":101,"¨":102,"©":103,"ª":104,"«":105,"¬":106,"®":107,"¯":108,"°":109,"±":110,"²":111,"³":112,"´":113,"µ":114,"¶":115,"·":116,"¸":117,"¹":118,"º":119,"»":120,"¼":121,"½":122,"¾":123,"¿":124,"À":125,"Á":126,"Â":127,"Ã":128,"Ä":129,"Å":130,"Æ":131,"Ç":132,"È":133,"É":134,"Ê":135,"Ë":136,"Ì":137,"Í":138,"Î":139,"Ï":140,"Ð":141,"Ñ":142,"Ò":143,"Ó":144,"Ô":145,"Õ":146,"Ö":147,"×":148,"Ø":149,"Ù":150,"Ú":151,"Û":152,"Ü":153,"Ý":154,"Þ":155,"ß":156,"à":157,"á":158,"â":159,"ã":160,"ä":161,"å":162,"æ":163,"ç":164,"è":165,"é":166,"ê":167,"ë":168,"ì":169,"í":170,"î":171,"ï":172,"ð":173,"ñ":174,"ò":175,"ó":176,"ô":177,"õ":178,"ö":179,"÷":180,"ø":181,"ù":182,"ú":183,"û":184,"ü":185,"ý":186,"þ":187,"ÿ":188,"Ā":189,"ā":190,"Ă":191,"ă":192,"Ą":193,"ą":194,"Ć":195,"ć":196,"Ĉ":197,"ĉ":198,"Ċ":199,"ċ":200,"Č":201,"č":202,"Ď":203,"ď":204,"Đ":205,"đ":206,"Ē":207,"ē":208,"Ĕ":209,"ĕ":210,"Ė":211,"ė":212,"Ę":213,"ę":214,"Ě":215,"ě":216,"Ĝ":217,"ĝ":218,"Ğ":219,"ğ":220,"Ġ":221,"ġ":222,"Ģ":223,"ģ":224,"Ĥ":225,"ĥ":226,"Ħ":227,"ħ":228,"Ĩ":229,"ĩ":230,"Ī":231,"ī":232,"Ĭ":233,"ĭ":234,"Į":235,"į":236,"İ":237,"ı":238,"IJ":239,"ij":240,"Ĵ":241,"ĵ":242,"Ķ":243,"ķ":244,"ĸ":245,"Ĺ":246,"ĺ":247,"Ļ":248,"ļ":249,"Ľ":250,"ľ":251,"Ŀ":252,"ŀ":253,"Ł":254,"ł":255,"Ń":256,"
|
|
|
|
| 1 |
+
{"<|endoftext|>":0,"!":1,"\"":2,"#":3,"$":4,"%":5,"&":6,"'":7,"(":8,")":9,"*":10,"+":11,",":12,"-":13,".":14,"/":15,"0":16,"1":17,"2":18,"3":19,"4":20,"5":21,"6":22,"7":23,"8":24,"9":25,":":26,";":27,"<":28,"=":29,">":30,"?":31,"@":32,"A":33,"B":34,"C":35,"D":36,"E":37,"F":38,"G":39,"H":40,"I":41,"J":42,"K":43,"L":44,"M":45,"N":46,"O":47,"P":48,"Q":49,"R":50,"S":51,"T":52,"U":53,"V":54,"W":55,"X":56,"Y":57,"Z":58,"[":59,"\\":60,"]":61,"^":62,"_":63,"`":64,"a":65,"b":66,"c":67,"d":68,"e":69,"f":70,"g":71,"h":72,"i":73,"j":74,"k":75,"l":76,"m":77,"n":78,"o":79,"p":80,"q":81,"r":82,"s":83,"t":84,"u":85,"v":86,"w":87,"x":88,"y":89,"z":90,"{":91,"|":92,"}":93,"~":94,"¡":95,"¢":96,"£":97,"¤":98,"¥":99,"¦":100,"§":101,"¨":102,"©":103,"ª":104,"«":105,"¬":106,"®":107,"¯":108,"°":109,"±":110,"²":111,"³":112,"´":113,"µ":114,"¶":115,"·":116,"¸":117,"¹":118,"º":119,"»":120,"¼":121,"½":122,"¾":123,"¿":124,"À":125,"Á":126,"Â":127,"Ã":128,"Ä":129,"Å":130,"Æ":131,"Ç":132,"È":133,"É":134,"Ê":135,"Ë":136,"Ì":137,"Í":138,"Î":139,"Ï":140,"Ð":141,"Ñ":142,"Ò":143,"Ó":144,"Ô":145,"Õ":146,"Ö":147,"×":148,"Ø":149,"Ù":150,"Ú":151,"Û":152,"Ü":153,"Ý":154,"Þ":155,"ß":156,"à":157,"á":158,"â":159,"ã":160,"ä":161,"å":162,"æ":163,"ç":164,"è":165,"é":166,"ê":167,"ë":168,"ì":169,"í":170,"î":171,"ï":172,"ð":173,"ñ":174,"ò":175,"ó":176,"ô":177,"õ":178,"ö":179,"÷":180,"ø":181,"ù":182,"ú":183,"û":184,"ü":185,"ý":186,"þ":187,"ÿ":188,"Ā":189,"ā":190,"Ă":191,"ă":192,"Ą":193,"ą":194,"Ć":195,"ć":196,"Ĉ":197,"ĉ":198,"Ċ":199,"ċ":200,"Č":201,"č":202,"Ď":203,"ď":204,"Đ":205,"đ":206,"Ē":207,"ē":208,"Ĕ":209,"ĕ":210,"Ė":211,"ė":212,"Ę":213,"ę":214,"Ě":215,"ě":216,"Ĝ":217,"ĝ":218,"Ğ":219,"ğ":220,"Ġ":221,"ġ":222,"Ģ":223,"ģ":224,"Ĥ":225,"ĥ":226,"Ħ":227,"ħ":228,"Ĩ":229,"ĩ":230,"Ī":231,"ī":232,"Ĭ":233,"ĭ":234,"Į":235,"į":236,"İ":237,"ı":238,"IJ":239,"ij":240,"Ĵ":241,"ĵ":242,"Ķ":243,"ķ":244,"ĸ":245,"Ĺ":246,"ĺ":247,"Ļ":248,"ļ":249,"Ľ":250,"ľ":251,"Ŀ":252,"ŀ":253,"Ł":254,"ł":255,"Ń":256,"or":257,"er":258,"at":259,"in":260,"is":261,"on":262,"Ġo":263,"Ġa":264,"Ġis":265,"al":266,"for":267,"ion":268,"re":269,"tr":270,"Ġc":271,"Ġm":272,"Ġs":273,"Ġfor":274,"ation":275,"Ġof":276,"an":277,"bu":278,"ct":279,"ed":280,"ic":281,"ier":282,"lier":283,"pp":284,"pr":285,"per":286,"rs":287,"srs":288,"th":289,"upp":290,"ys":291,"Ġj":292,"Ġth":293,"ors":294,"ators":295,"int":296,"Ġare":297,"pro":298,"perators":299,"upplier":300,"In":301,"Jsrs":302,"Su":303,"aj":304,"ays":305,"bs":306,"du":307,"dy":308,"em":309,"en":310,"es":311,"ew":312,"ey":313,"ean":314,"ect":315,"fic":316,"gi":317,"gs":318,"gat":319,"gis":320,"hic":321,"ibu":322,"idy":323,"ien":324,"ific":325,"ject":326,"ked":327,"lin":328,"man":329,"ntr":330,"nation":331,"ou":332,"oint":333,"operators":334,"sin":335,"supplier":336,"sgat":337,"tor":338,"ted":339,"tem":340,"tific":341,"ue":342,"ver":343,"val":344,"whic":345,"Ġor":346,"Ġre":347,"Ġbu":348,"Ġint":349,"Ġpro":350,"Ġgi":351,"Ġnation":352,"Ġval":353,"Ġwhic":354,"erlin":355,"ertific":356,"ings":357,"ontr":358,"oney":359,"Ġoperators":360,"Ġoman":361,"ale":362,"als":363,"tration":364,"Ġcou":365,"Ġcertific":366,"Ġcontr":367,"Ġmaj":368,"Ġmean":369,"Ġmoney":370,"Ġsys":371,"Ġsupplier":372,"Ġsale":373,"cts":374,"Ġjsrs":375,"Ġjoint":376,"Ġthe":377,"Ġthings":378,"produ":379,"Subs":380,"essgat":381,"eways":382,"gistration":383,"ibutor":384,"iented":385,"ntry":386,"sinessgat":387,"suppliers":388,"Ġoriented":389,"Ġregistration":390,"Ġbusinessgat":391,"Ġinterlin":392,"Ġproject":393,"Ġgiver":394,"Ġnationals":395,"Ġvalue":396,"Ġwhich":397,"Ġcountry":398,"Ġcertification":399,"Ġcontributor":400,"Ġmajor":401,"Ġmeans":402,"Ġsystem":403,"products":404,"Subsidy":405,"Ġbusinessgateways":406,"Ġinterlinked":407}
|