sanchit-gandhi commited on
Commit
65112aa
·
1 Parent(s): c798b5c

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +4 -0
  2. tokenizer_config.json +12 -0
  3. vocab.json +177 -0
special_tokens_map.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "pad_token": "|",
3
+ "unk_token": "<unk>"
4
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_blank": true,
3
+ "clean_up_tokenization_spaces": true,
4
+ "is_uroman": false,
5
+ "language": "kqy",
6
+ "model_max_length": 1000000000000000019884624838656,
7
+ "normalize": true,
8
+ "pad_token": "|",
9
+ "phonemize": false,
10
+ "tokenizer_class": "VitsTokenizer",
11
+ "unk_token": "<unk>"
12
+ }
vocab.json ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ " ": 174,
3
+ "-": 167,
4
+ "|": 0,
5
+ "ሀ": 13,
6
+ "ሁ": 82,
7
+ "ሂ": 153,
8
+ "ሃ": 108,
9
+ "ሄ": 78,
10
+ "ህ": 9,
11
+ "ሆ": 121,
12
+ "ለ": 47,
13
+ "ሉ": 94,
14
+ "ሊ": 163,
15
+ "ላ": 81,
16
+ "ሌ": 69,
17
+ "ል": 55,
18
+ "ሎ": 53,
19
+ "መ": 30,
20
+ "ሙ": 86,
21
+ "ሚ": 112,
22
+ "ማ": 23,
23
+ "ሜ": 54,
24
+ "ም": 46,
25
+ "ሞ": 65,
26
+ "ሠ": 120,
27
+ "ሡ": 76,
28
+ "ሣ": 141,
29
+ "ሤ": 60,
30
+ "ሥ": 36,
31
+ "ሦ": 83,
32
+ "ረ": 14,
33
+ "ሩ": 68,
34
+ "ራ": 133,
35
+ "ሬ": 72,
36
+ "ር": 33,
37
+ "ሮ": 41,
38
+ "ሰ": 17,
39
+ "ሱ": 24,
40
+ "ሲ": 116,
41
+ "ሳ": 67,
42
+ "ሴ": 4,
43
+ "ስ": 7,
44
+ "ሶ": 38,
45
+ "ሸ": 48,
46
+ "ሹ": 100,
47
+ "ሺ": 168,
48
+ "ሻ": 132,
49
+ "ሼ": 85,
50
+ "ሽ": 59,
51
+ "ሾ": 113,
52
+ "ቀ": 130,
53
+ "ቁ": 80,
54
+ "ቂ": 162,
55
+ "ቃ": 146,
56
+ "ቄ": 51,
57
+ "ቅ": 96,
58
+ "ቆ": 105,
59
+ "በ": 37,
60
+ "ቡ": 79,
61
+ "ቢ": 158,
62
+ "ባ": 64,
63
+ "ቤ": 25,
64
+ "ብ": 45,
65
+ "ቦ": 74,
66
+ "ተ": 15,
67
+ "ቱ": 50,
68
+ "ታ": 57,
69
+ "ቴ": 26,
70
+ "ት": 29,
71
+ "ቶ": 16,
72
+ "ቸ": 52,
73
+ "ቹ": 143,
74
+ "ቻ": 152,
75
+ "ቼ": 129,
76
+ "ች": 107,
77
+ "ቾ": 77,
78
+ "ነ": 18,
79
+ "ኑ": 34,
80
+ "ኒ": 150,
81
+ "ና": 99,
82
+ "ኔ": 20,
83
+ "ን": 1,
84
+ "ኖ": 123,
85
+ "አ": 8,
86
+ "ኡ": 12,
87
+ "ኢ": 95,
88
+ "ኣ": 49,
89
+ "ኤ": 2,
90
+ "እ": 44,
91
+ "ኦ": 31,
92
+ "ከ": 39,
93
+ "ኩ": 71,
94
+ "ኪ": 157,
95
+ "ካ": 75,
96
+ "ኬ": 22,
97
+ "ክ": 58,
98
+ "ኮ": 6,
99
+ "ወ": 19,
100
+ "ዉ": 101,
101
+ "ዋ": 102,
102
+ "ዌ": 139,
103
+ "ው": 111,
104
+ "ዎ": 11,
105
+ "ዘ": 87,
106
+ "ዙ": 115,
107
+ "ዚ": 172,
108
+ "ዛ": 144,
109
+ "ዜ": 73,
110
+ "ዝ": 63,
111
+ "ዞ": 103,
112
+ "ዠ": 138,
113
+ "ዡ": 159,
114
+ "ዢ": 169,
115
+ "ዣ": 161,
116
+ "ዤ": 128,
117
+ "ዥ": 147,
118
+ "ዦ": 160,
119
+ "የ": 32,
120
+ "ዩ": 135,
121
+ "ዪ": 165,
122
+ "ያ": 92,
123
+ "ዬ": 5,
124
+ "ይ": 3,
125
+ "ዮ": 56,
126
+ "ደ": 40,
127
+ "ዱ": 93,
128
+ "ዲ": 127,
129
+ "ዳ": 125,
130
+ "ዴ": 21,
131
+ "ድ": 10,
132
+ "ዶ": 28,
133
+ "ዸ": 88,
134
+ "ዹ": 109,
135
+ "ዺ": 155,
136
+ "ዻ": 145,
137
+ "ዼ": 61,
138
+ "ዽ": 84,
139
+ "ዾ": 89,
140
+ "ጁ": 171,
141
+ "ጄ": 134,
142
+ "ጅ": 166,
143
+ "ገ": 27,
144
+ "ጉ": 62,
145
+ "ጊ": 173,
146
+ "ጋ": 131,
147
+ "ጌ": 35,
148
+ "ግ": 43,
149
+ "ጎ": 70,
150
+ "ጨ": 142,
151
+ "ጩ": 98,
152
+ "ጫ": 140,
153
+ "ጬ": 118,
154
+ "ጭ": 117,
155
+ "ጮ": 122,
156
+ "ጰ": 124,
157
+ "ጱ": 164,
158
+ "ጳ": 154,
159
+ "ጴ": 137,
160
+ "ጵ": 126,
161
+ "ጶ": 151,
162
+ "ጸ": 110,
163
+ "ጹ": 106,
164
+ "ጺ": 148,
165
+ "ጻ": 149,
166
+ "ጼ": 104,
167
+ "ጽ": 90,
168
+ "ጾ": 119,
169
+ "ፈ": 42,
170
+ "ፉ": 97,
171
+ "ፊ": 156,
172
+ "ፋ": 114,
173
+ "ፌ": 66,
174
+ "ፍ": 91,
175
+ "ፎ": 136,
176
+ "–": 170
177
+ }