KathirKs commited on
Commit
b0ff7ea
·
verified ·
1 Parent(s): 7d8e207

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +126 -0
  2. tokenizer_config.json +112 -0
tokenizer.json CHANGED
@@ -43,6 +43,132 @@
43
  "rstrip": false,
44
  "normalized": false,
45
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  }
47
  ],
48
  "normalizer": {
 
43
  "rstrip": false,
44
  "normalized": false,
45
  "special": true
46
+ },
47
+ {
48
+ "id": 28733,
49
+ "content": "-",
50
+ "single_word": false,
51
+ "lstrip": false,
52
+ "rstrip": false,
53
+ "normalized": true,
54
+ "special": false
55
+ },
56
+ {
57
+ "id": 28734,
58
+ "content": "0",
59
+ "single_word": false,
60
+ "lstrip": false,
61
+ "rstrip": false,
62
+ "normalized": true,
63
+ "special": false
64
+ },
65
+ {
66
+ "id": 28736,
67
+ "content": "*",
68
+ "single_word": false,
69
+ "lstrip": false,
70
+ "rstrip": false,
71
+ "normalized": true,
72
+ "special": false
73
+ },
74
+ {
75
+ "id": 28740,
76
+ "content": "1",
77
+ "single_word": false,
78
+ "lstrip": false,
79
+ "rstrip": false,
80
+ "normalized": true,
81
+ "special": false
82
+ },
83
+ {
84
+ "id": 28746,
85
+ "content": "=",
86
+ "single_word": false,
87
+ "lstrip": false,
88
+ "rstrip": false,
89
+ "normalized": true,
90
+ "special": false
91
+ },
92
+ {
93
+ "id": 28750,
94
+ "content": "2",
95
+ "single_word": false,
96
+ "lstrip": false,
97
+ "rstrip": false,
98
+ "normalized": true,
99
+ "special": false
100
+ },
101
+ {
102
+ "id": 28770,
103
+ "content": "3",
104
+ "single_word": false,
105
+ "lstrip": false,
106
+ "rstrip": false,
107
+ "normalized": true,
108
+ "special": false
109
+ },
110
+ {
111
+ "id": 28774,
112
+ "content": "9",
113
+ "single_word": false,
114
+ "lstrip": false,
115
+ "rstrip": false,
116
+ "normalized": true,
117
+ "special": false
118
+ },
119
+ {
120
+ "id": 28781,
121
+ "content": "4",
122
+ "single_word": false,
123
+ "lstrip": false,
124
+ "rstrip": false,
125
+ "normalized": true,
126
+ "special": false
127
+ },
128
+ {
129
+ "id": 28782,
130
+ "content": "5",
131
+ "single_word": false,
132
+ "lstrip": false,
133
+ "rstrip": false,
134
+ "normalized": true,
135
+ "special": false
136
+ },
137
+ {
138
+ "id": 28783,
139
+ "content": "8",
140
+ "single_word": false,
141
+ "lstrip": false,
142
+ "rstrip": false,
143
+ "normalized": true,
144
+ "special": false
145
+ },
146
+ {
147
+ "id": 28784,
148
+ "content": "6",
149
+ "single_word": false,
150
+ "lstrip": false,
151
+ "rstrip": false,
152
+ "normalized": true,
153
+ "special": false
154
+ },
155
+ {
156
+ "id": 28787,
157
+ "content": "7",
158
+ "single_word": false,
159
+ "lstrip": false,
160
+ "rstrip": false,
161
+ "normalized": true,
162
+ "special": false
163
+ },
164
+ {
165
+ "id": 32000,
166
+ "content": " ",
167
+ "single_word": false,
168
+ "lstrip": false,
169
+ "rstrip": false,
170
+ "normalized": true,
171
+ "special": false
172
  }
173
  ],
174
  "normalizer": {
tokenizer_config.json CHANGED
@@ -23,6 +23,118 @@
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
  },
28
  "additional_special_tokens": [],
 
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
26
+ },
27
+ "28733": {
28
+ "content": "-",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": false
34
+ },
35
+ "28734": {
36
+ "content": "0",
37
+ "lstrip": false,
38
+ "normalized": true,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": false
42
+ },
43
+ "28736": {
44
+ "content": "*",
45
+ "lstrip": false,
46
+ "normalized": true,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": false
50
+ },
51
+ "28740": {
52
+ "content": "1",
53
+ "lstrip": false,
54
+ "normalized": true,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": false
58
+ },
59
+ "28746": {
60
+ "content": "=",
61
+ "lstrip": false,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": false
66
+ },
67
+ "28750": {
68
+ "content": "2",
69
+ "lstrip": false,
70
+ "normalized": true,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": false
74
+ },
75
+ "28770": {
76
+ "content": "3",
77
+ "lstrip": false,
78
+ "normalized": true,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": false
82
+ },
83
+ "28774": {
84
+ "content": "9",
85
+ "lstrip": false,
86
+ "normalized": true,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": false
90
+ },
91
+ "28781": {
92
+ "content": "4",
93
+ "lstrip": false,
94
+ "normalized": true,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": false
98
+ },
99
+ "28782": {
100
+ "content": "5",
101
+ "lstrip": false,
102
+ "normalized": true,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": false
106
+ },
107
+ "28783": {
108
+ "content": "8",
109
+ "lstrip": false,
110
+ "normalized": true,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": false
114
+ },
115
+ "28784": {
116
+ "content": "6",
117
+ "lstrip": false,
118
+ "normalized": true,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": false
122
+ },
123
+ "28787": {
124
+ "content": "7",
125
+ "lstrip": false,
126
+ "normalized": true,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": false
130
+ },
131
+ "32000": {
132
+ "content": " ",
133
+ "lstrip": false,
134
+ "normalized": true,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": false
138
  }
139
  },
140
  "additional_special_tokens": [],