datapaf commited on
Commit
8fea0b2
·
verified ·
1 Parent(s): eb342a3

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +0 -0
  2. tokenizer_config.json +22 -22
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "add_eos_token": false,
4
  "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
- "38959": {
7
  "content": "õ",
8
  "lstrip": false,
9
  "normalized": true,
@@ -11,7 +11,7 @@
11
  "single_word": false,
12
  "special": false
13
  },
14
- "38960": {
15
  "content": "÷",
16
  "lstrip": false,
17
  "normalized": true,
@@ -19,7 +19,7 @@
19
  "single_word": false,
20
  "special": false
21
  },
22
- "38961": {
23
  "content": "Á",
24
  "lstrip": false,
25
  "normalized": true,
@@ -27,7 +27,7 @@
27
  "single_word": false,
28
  "special": false
29
  },
30
- "38962": {
31
  "content": "ý",
32
  "lstrip": false,
33
  "normalized": true,
@@ -35,7 +35,7 @@
35
  "single_word": false,
36
  "special": false
37
  },
38
- "38963": {
39
  "content": "À",
40
  "lstrip": false,
41
  "normalized": true,
@@ -43,7 +43,7 @@
43
  "single_word": false,
44
  "special": false
45
  },
46
- "38964": {
47
  "content": "ÿ",
48
  "lstrip": false,
49
  "normalized": true,
@@ -51,7 +51,7 @@
51
  "single_word": false,
52
  "special": false
53
  },
54
- "38965": {
55
  "content": "ø",
56
  "lstrip": false,
57
  "normalized": true,
@@ -59,7 +59,7 @@
59
  "single_word": false,
60
  "special": false
61
  },
62
- "38966": {
63
  "content": "ú",
64
  "lstrip": false,
65
  "normalized": true,
@@ -67,7 +67,7 @@
67
  "single_word": false,
68
  "special": false
69
  },
70
- "38967": {
71
  "content": "þ",
72
  "lstrip": false,
73
  "normalized": true,
@@ -75,7 +75,7 @@
75
  "single_word": false,
76
  "special": false
77
  },
78
- "38968": {
79
  "content": "ü",
80
  "lstrip": false,
81
  "normalized": true,
@@ -83,7 +83,7 @@
83
  "single_word": false,
84
  "special": false
85
  },
86
- "38969": {
87
  "content": "ù",
88
  "lstrip": false,
89
  "normalized": true,
@@ -91,7 +91,7 @@
91
  "single_word": false,
92
  "special": false
93
  },
94
- "38970": {
95
  "content": "ö",
96
  "lstrip": false,
97
  "normalized": true,
@@ -99,7 +99,7 @@
99
  "single_word": false,
100
  "special": false
101
  },
102
- "38971": {
103
  "content": "û",
104
  "lstrip": false,
105
  "normalized": true,
@@ -107,7 +107,7 @@
107
  "single_word": false,
108
  "special": false
109
  },
110
- "38972": {
111
  "content": "<|begin▁of▁sentence|>",
112
  "lstrip": false,
113
  "normalized": true,
@@ -115,7 +115,7 @@
115
  "single_word": false,
116
  "special": true
117
  },
118
- "38973": {
119
  "content": "<|end▁of▁sentence|>",
120
  "lstrip": false,
121
  "normalized": true,
@@ -123,7 +123,7 @@
123
  "single_word": false,
124
  "special": true
125
  },
126
- "38974": {
127
  "content": "<|fim▁hole|>",
128
  "lstrip": false,
129
  "normalized": true,
@@ -131,7 +131,7 @@
131
  "single_word": false,
132
  "special": false
133
  },
134
- "38975": {
135
  "content": "<|fim▁begin|>",
136
  "lstrip": false,
137
  "normalized": true,
@@ -139,7 +139,7 @@
139
  "single_word": false,
140
  "special": false
141
  },
142
- "38976": {
143
  "content": "<|fim▁end|>",
144
  "lstrip": false,
145
  "normalized": true,
@@ -147,7 +147,7 @@
147
  "single_word": false,
148
  "special": false
149
  },
150
- "38977": {
151
  "content": "<pad>",
152
  "lstrip": false,
153
  "normalized": true,
@@ -155,7 +155,7 @@
155
  "single_word": false,
156
  "special": false
157
  },
158
- "38978": {
159
  "content": "<|User|>",
160
  "lstrip": false,
161
  "normalized": true,
@@ -163,7 +163,7 @@
163
  "single_word": false,
164
  "special": false
165
  },
166
- "38979": {
167
  "content": "<|Assistant|>",
168
  "lstrip": false,
169
  "normalized": true,
@@ -171,7 +171,7 @@
171
  "single_word": false,
172
  "special": false
173
  },
174
- "38980": {
175
  "content": "<|EOT|>",
176
  "lstrip": false,
177
  "normalized": true,
 
3
  "add_eos_token": false,
4
  "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
+ "39861": {
7
  "content": "õ",
8
  "lstrip": false,
9
  "normalized": true,
 
11
  "single_word": false,
12
  "special": false
13
  },
14
+ "39862": {
15
  "content": "÷",
16
  "lstrip": false,
17
  "normalized": true,
 
19
  "single_word": false,
20
  "special": false
21
  },
22
+ "39863": {
23
  "content": "Á",
24
  "lstrip": false,
25
  "normalized": true,
 
27
  "single_word": false,
28
  "special": false
29
  },
30
+ "39864": {
31
  "content": "ý",
32
  "lstrip": false,
33
  "normalized": true,
 
35
  "single_word": false,
36
  "special": false
37
  },
38
+ "39865": {
39
  "content": "À",
40
  "lstrip": false,
41
  "normalized": true,
 
43
  "single_word": false,
44
  "special": false
45
  },
46
+ "39866": {
47
  "content": "ÿ",
48
  "lstrip": false,
49
  "normalized": true,
 
51
  "single_word": false,
52
  "special": false
53
  },
54
+ "39867": {
55
  "content": "ø",
56
  "lstrip": false,
57
  "normalized": true,
 
59
  "single_word": false,
60
  "special": false
61
  },
62
+ "39868": {
63
  "content": "ú",
64
  "lstrip": false,
65
  "normalized": true,
 
67
  "single_word": false,
68
  "special": false
69
  },
70
+ "39869": {
71
  "content": "þ",
72
  "lstrip": false,
73
  "normalized": true,
 
75
  "single_word": false,
76
  "special": false
77
  },
78
+ "39870": {
79
  "content": "ü",
80
  "lstrip": false,
81
  "normalized": true,
 
83
  "single_word": false,
84
  "special": false
85
  },
86
+ "39871": {
87
  "content": "ù",
88
  "lstrip": false,
89
  "normalized": true,
 
91
  "single_word": false,
92
  "special": false
93
  },
94
+ "39872": {
95
  "content": "ö",
96
  "lstrip": false,
97
  "normalized": true,
 
99
  "single_word": false,
100
  "special": false
101
  },
102
+ "39873": {
103
  "content": "û",
104
  "lstrip": false,
105
  "normalized": true,
 
107
  "single_word": false,
108
  "special": false
109
  },
110
+ "39874": {
111
  "content": "<|begin▁of▁sentence|>",
112
  "lstrip": false,
113
  "normalized": true,
 
115
  "single_word": false,
116
  "special": true
117
  },
118
+ "39875": {
119
  "content": "<|end▁of▁sentence|>",
120
  "lstrip": false,
121
  "normalized": true,
 
123
  "single_word": false,
124
  "special": true
125
  },
126
+ "39876": {
127
  "content": "<|fim▁hole|>",
128
  "lstrip": false,
129
  "normalized": true,
 
131
  "single_word": false,
132
  "special": false
133
  },
134
+ "39877": {
135
  "content": "<|fim▁begin|>",
136
  "lstrip": false,
137
  "normalized": true,
 
139
  "single_word": false,
140
  "special": false
141
  },
142
+ "39878": {
143
  "content": "<|fim▁end|>",
144
  "lstrip": false,
145
  "normalized": true,
 
147
  "single_word": false,
148
  "special": false
149
  },
150
+ "39879": {
151
  "content": "<pad>",
152
  "lstrip": false,
153
  "normalized": true,
 
155
  "single_word": false,
156
  "special": false
157
  },
158
+ "39880": {
159
  "content": "<|User|>",
160
  "lstrip": false,
161
  "normalized": true,
 
163
  "single_word": false,
164
  "special": false
165
  },
166
+ "39881": {
167
  "content": "<|Assistant|>",
168
  "lstrip": false,
169
  "normalized": true,
 
171
  "single_word": false,
172
  "special": false
173
  },
174
+ "39882": {
175
  "content": "<|EOT|>",
176
  "lstrip": false,
177
  "normalized": true,