pianistprogrammer commited on
Commit
9852ecb
·
verified ·
1 Parent(s): 4cfccc4

Upload vocab.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. vocab.json +198 -0
vocab.json ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "char2idx": {
3
+ "[PAD]": 0,
4
+ "[UNK]": 1,
5
+ "[CLS]": 2,
6
+ "[SEP]": 3,
7
+ "[MASK]": 4,
8
+ " ": 5,
9
+ "!": 6,
10
+ "\"": 7,
11
+ "#": 8,
12
+ "$": 9,
13
+ "&": 10,
14
+ "'": 11,
15
+ "(": 12,
16
+ ")": 13,
17
+ "*": 14,
18
+ "+": 15,
19
+ ",": 16,
20
+ "-": 17,
21
+ ".": 18,
22
+ "/": 19,
23
+ "0": 20,
24
+ "1": 21,
25
+ "2": 22,
26
+ "3": 23,
27
+ "4": 24,
28
+ "5": 25,
29
+ "6": 26,
30
+ "7": 27,
31
+ "8": 28,
32
+ "9": 29,
33
+ ":": 30,
34
+ ";": 31,
35
+ "<": 32,
36
+ "=": 33,
37
+ ">": 34,
38
+ "?": 35,
39
+ "@": 36,
40
+ "A": 37,
41
+ "B": 38,
42
+ "C": 39,
43
+ "D": 40,
44
+ "E": 41,
45
+ "F": 42,
46
+ "G": 43,
47
+ "H": 44,
48
+ "I": 45,
49
+ "J": 46,
50
+ "K": 47,
51
+ "L": 48,
52
+ "M": 49,
53
+ "N": 50,
54
+ "O": 51,
55
+ "P": 52,
56
+ "Q": 53,
57
+ "R": 54,
58
+ "S": 55,
59
+ "T": 56,
60
+ "U": 57,
61
+ "V": 58,
62
+ "W": 59,
63
+ "X": 60,
64
+ "Y": 61,
65
+ "Z": 62,
66
+ "[": 63,
67
+ "\\": 64,
68
+ "]": 65,
69
+ "^": 66,
70
+ "_": 67,
71
+ "a": 68,
72
+ "b": 69,
73
+ "c": 70,
74
+ "d": 71,
75
+ "e": 72,
76
+ "f": 73,
77
+ "g": 74,
78
+ "h": 75,
79
+ "i": 76,
80
+ "j": 77,
81
+ "k": 78,
82
+ "l": 79,
83
+ "m": 80,
84
+ "n": 81,
85
+ "o": 82,
86
+ "p": 83,
87
+ "q": 84,
88
+ "r": 85,
89
+ "s": 86,
90
+ "t": 87,
91
+ "u": 88,
92
+ "v": 89,
93
+ "w": 90,
94
+ "x": 91,
95
+ "y": 92,
96
+ "z": 93,
97
+ "{": 94,
98
+ "|": 95,
99
+ "}": 96,
100
+ "~": 97
101
+ },
102
+ "char_freq": {
103
+ "A": 2773205,
104
+ "3": 716683,
105
+ " ": 14189206,
106
+ "B": 2553267,
107
+ "D": 934338,
108
+ "F": 1212887,
109
+ "|": 3990587,
110
+ "E": 956673,
111
+ "G": 1997121,
112
+ ",": 250436,
113
+ "2": 2938490,
114
+ "d": 2640325,
115
+ ":": 523581,
116
+ "f": 1571735,
117
+ "a": 833393,
118
+ "b": 292192,
119
+ "e": 2063799,
120
+ "z": 159953,
121
+ "g": 1311271,
122
+ "c": 2075262,
123
+ "/": 1231467,
124
+ "^": 254215,
125
+ ">": 660780,
126
+ "(": 611072,
127
+ "{": 123308,
128
+ "}": 123293,
129
+ "4": 304812,
130
+ ")": 443168,
131
+ "]": 243890,
132
+ "\"": 1643763,
133
+ "C": 280564,
134
+ "#": 15741,
135
+ "-": 80632,
136
+ "7": 142113,
137
+ "m": 159658,
138
+ "?": 544,
139
+ "1": 79657,
140
+ "!": 70950,
141
+ "r": 35591,
142
+ "t": 35597,
143
+ "_": 35830,
144
+ ".": 124624,
145
+ "v": 11644,
146
+ "=": 87849,
147
+ "x": 1580,
148
+ "8": 20556,
149
+ "~": 413,
150
+ "'": 73806,
151
+ "T": 35970,
152
+ "s": 16028,
153
+ "n": 17698,
154
+ "o": 18832,
155
+ "i": 26117,
156
+ "S": 9635,
157
+ "<": 105070,
158
+ "W": 692,
159
+ "l": 15109,
160
+ "[": 177166,
161
+ "M": 27314,
162
+ "6": 55467,
163
+ "N": 2171,
164
+ "w": 4436,
165
+ "K": 20397,
166
+ "O": 5944,
167
+ "P": 4256,
168
+ "u": 19000,
169
+ "k": 1939,
170
+ "p": 7741,
171
+ "Q": 3008,
172
+ "0": 3943,
173
+ "V": 1658,
174
+ "R": 939,
175
+ "h": 5484,
176
+ "y": 3142,
177
+ "5": 5052,
178
+ "9": 3226,
179
+ "\\": 1077,
180
+ "+": 1175,
181
+ "*": 2494,
182
+ "U": 309,
183
+ "q": 435,
184
+ "L": 675,
185
+ "X": 107,
186
+ "Y": 30,
187
+ "H": 431,
188
+ "I": 1429,
189
+ "J": 314,
190
+ "&": 139,
191
+ "$": 337,
192
+ "j": 632,
193
+ "@": 31,
194
+ "Z": 37,
195
+ ";": 154,
196
+ "`": 4
197
+ }
198
+ }