josem7 commited on
Commit
4381a1a
·
1 Parent(s): faf9faf

Upload tokenizer.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer.json +3 -66
tokenizer.json CHANGED
@@ -31,43 +31,7 @@
31
  "special": true
32
  },
33
  {
34
- "id": 32007,
35
- "content": "▁<PRE>",
36
- "single_word": false,
37
- "lstrip": true,
38
- "rstrip": true,
39
- "normalized": false,
40
- "special": true
41
- },
42
- {
43
- "id": 32008,
44
- "content": "▁<SUF>",
45
- "single_word": false,
46
- "lstrip": true,
47
- "rstrip": true,
48
- "normalized": false,
49
- "special": true
50
- },
51
- {
52
- "id": 32009,
53
- "content": "▁<MID>",
54
- "single_word": false,
55
- "lstrip": true,
56
- "rstrip": true,
57
- "normalized": false,
58
- "special": true
59
- },
60
- {
61
- "id": 32010,
62
- "content": "▁<EOT>",
63
- "single_word": false,
64
- "lstrip": true,
65
- "rstrip": true,
66
- "normalized": false,
67
- "special": true
68
- },
69
- {
70
- "id": 32016,
71
  "content": "<PAD>",
72
  "single_word": false,
73
  "lstrip": false,
@@ -32179,23 +32143,7 @@
32179
  "왕": 31996,
32180
  "收": 31997,
32181
  "弘": 31998,
32182
- "给": 31999,
32183
- "▁<SU": 32000,
32184
- "▁<SUF": 32001,
32185
- "▁<PRE": 32002,
32186
- "▁<M": 32003,
32187
- "▁<MID": 32004,
32188
- "▁<E": 32005,
32189
- "▁<EOT": 32006,
32190
- "▁<PRE>": 32007,
32191
- "▁<SUF>": 32008,
32192
- "▁<MID>": 32009,
32193
- "▁<EOT>": 32010,
32194
- "▁<EOT><EOT>": 32011,
32195
- "▁<EOT><EOT><EOT>": 32012,
32196
- "▁<EOT><EOT><EOT><EOT>": 32013,
32197
- "▁<EOT><EOT><EOT><EOT><EOT>": 32014,
32198
- "▁<EOT><EOT><EOT><EOT><EOT><EOT>": 32015
32199
  },
32200
  "merges": [
32201
  "▁ t",
@@ -93446,18 +93394,7 @@
93446
  "▁▁▁▁▁▁▁▁▁ ▁▁▁▁▁▁",
93447
  "▁▁▁▁▁▁▁ ▁▁▁▁▁▁▁▁",
93448
  "▁▁▁▁▁▁▁▁▁▁▁ ▁▁▁▁",
93449
- "▁ ▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
93450
- "▁< SU",
93451
- "▁<SU F",
93452
- "▁< PRE",
93453
- "▁< M",
93454
- "▁<M ID",
93455
- "▁< E",
93456
- "▁<E OT",
93457
- "▁<PRE >",
93458
- "▁<SUF >",
93459
- "▁<MID >",
93460
- "▁<EOT >"
93461
  ]
93462
  }
93463
  }
 
31
  "special": true
32
  },
33
  {
34
+ "id": 32000,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  "content": "<PAD>",
36
  "single_word": false,
37
  "lstrip": false,
 
32143
  "왕": 31996,
32144
  "收": 31997,
32145
  "弘": 31998,
32146
+ "给": 31999
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32147
  },
32148
  "merges": [
32149
  "▁ t",
 
93394
  "▁▁▁▁▁▁▁▁▁ ▁▁▁▁▁▁",
93395
  "▁▁▁▁▁▁▁ ▁▁▁▁▁▁▁▁",
93396
  "▁▁▁▁▁▁▁▁▁▁▁ ▁▁▁▁",
93397
+ "▁ ▁▁▁▁▁▁▁▁▁▁▁▁▁▁"
 
 
 
 
 
 
 
 
 
 
 
93398
  ]
93399
  }
93400
  }