zluvolyote commited on
Commit
9f599a3
·
1 Parent(s): e9d8869

add tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer.json +9 -83
tokenizer.json CHANGED
@@ -61,81 +61,7 @@
61
  ]
62
  },
63
  "pre_tokenizer": null,
64
- "post_processor": {
65
- "type": "TemplateProcessing",
66
- "single": [
67
- {
68
- "SpecialToken": {
69
- "id": "[CLS]",
70
- "type_id": 0
71
- }
72
- },
73
- {
74
- "Sequence": {
75
- "id": "A",
76
- "type_id": 0
77
- }
78
- },
79
- {
80
- "SpecialToken": {
81
- "id": "[SEP]",
82
- "type_id": 0
83
- }
84
- }
85
- ],
86
- "pair": [
87
- {
88
- "SpecialToken": {
89
- "id": "[CLS]",
90
- "type_id": 0
91
- }
92
- },
93
- {
94
- "Sequence": {
95
- "id": "A",
96
- "type_id": 0
97
- }
98
- },
99
- {
100
- "SpecialToken": {
101
- "id": "[SEP]",
102
- "type_id": 0
103
- }
104
- },
105
- {
106
- "Sequence": {
107
- "id": "B",
108
- "type_id": 1
109
- }
110
- },
111
- {
112
- "SpecialToken": {
113
- "id": "[SEP]",
114
- "type_id": 1
115
- }
116
- }
117
- ],
118
- "special_tokens": {
119
- "[CLS]": {
120
- "id": "[CLS]",
121
- "ids": [
122
- 2
123
- ],
124
- "tokens": [
125
- "[CLS]"
126
- ]
127
- },
128
- "[SEP]": {
129
- "id": "[SEP]",
130
- "ids": [
131
- 3
132
- ],
133
- "tokens": [
134
- "[SEP]"
135
- ]
136
- }
137
- }
138
- },
139
  "decoder": {
140
  "type": "WordPiece",
141
  "prefix": "##",
@@ -159,13 +85,13 @@
159
  "4": 9,
160
  "5": 10,
161
  "6": 11,
162
- "##1": 12,
163
- "## ": 13,
164
- "##4": 14,
165
- "##2": 15,
166
  "##3": 16,
167
- "##5": 17,
168
- "##6": 18,
169
  "##11": 19,
170
  "##12": 20,
171
  "##1 ": 21,
@@ -499,8 +425,8 @@
499
  "##3331 ": 349,
500
  "##2612 ": 350,
501
  "##2225 ": 351,
502
- "##512": 352,
503
- "##6221 ": 353,
504
  "##514": 354,
505
  "##2252 ": 355,
506
  "##152 ": 356,
 
61
  ]
62
  },
63
  "pre_tokenizer": null,
64
+ "post_processor": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  "decoder": {
66
  "type": "WordPiece",
67
  "prefix": "##",
 
85
  "4": 9,
86
  "5": 10,
87
  "6": 11,
88
+ "##4": 12,
89
+ "##2": 13,
90
+ "## ": 14,
91
+ "##1": 15,
92
  "##3": 16,
93
+ "##6": 17,
94
+ "##5": 18,
95
  "##11": 19,
96
  "##12": 20,
97
  "##1 ": 21,
 
425
  "##3331 ": 349,
426
  "##2612 ": 350,
427
  "##2225 ": 351,
428
+ "##6221 ": 352,
429
+ "##512": 353,
430
  "##514": 354,
431
  "##2252 ": 355,
432
  "##152 ": 356,