sanchit-gandhi commited on
Commit
3e96147
·
1 Parent(s): 56a1632

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +4 -0
  2. tokenizer_config.json +12 -0
  3. vocab.json +187 -0
special_tokens_map.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "pad_token": "|",
3
+ "unk_token": "<unk>"
4
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_blank": true,
3
+ "clean_up_tokenization_spaces": true,
4
+ "is_uroman": false,
5
+ "language": "ktb",
6
+ "model_max_length": 1000000000000000019884624838656,
7
+ "normalize": true,
8
+ "pad_token": "|",
9
+ "phonemize": false,
10
+ "tokenizer_class": "VitsTokenizer",
11
+ "unk_token": "<unk>"
12
+ }
vocab.json ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ " ": 184,
3
+ "-": 131,
4
+ "|": 0,
5
+ "ሀ": 32,
6
+ "ሁ": 30,
7
+ "ሂ": 124,
8
+ "ሃ": 36,
9
+ "ሄ": 80,
10
+ "ህ": 17,
11
+ "ሆ": 39,
12
+ "ለ": 28,
13
+ "ሉ": 91,
14
+ "ሊ": 108,
15
+ "ላ": 69,
16
+ "ሌ": 66,
17
+ "ል": 35,
18
+ "ሎ": 87,
19
+ "ሏ": 168,
20
+ "መ": 4,
21
+ "ሙ": 70,
22
+ "ሚ": 98,
23
+ "ማ": 38,
24
+ "ሜ": 53,
25
+ "ም": 9,
26
+ "ሞ": 42,
27
+ "ሟ": 159,
28
+ "ረ": 27,
29
+ "ሩ": 56,
30
+ "ሪ": 89,
31
+ "ራ": 75,
32
+ "ሬ": 57,
33
+ "ር": 15,
34
+ "ሮ": 67,
35
+ "ሯ": 170,
36
+ "ሰ": 13,
37
+ "ሱ": 25,
38
+ "ሲ": 81,
39
+ "ሳ": 60,
40
+ "ሴ": 48,
41
+ "ስ": 6,
42
+ "ሶ": 50,
43
+ "ሷ": 172,
44
+ "ሸ": 107,
45
+ "ሹ": 147,
46
+ "ሺ": 152,
47
+ "ሻ": 133,
48
+ "ሼ": 136,
49
+ "ሽ": 82,
50
+ "ሾ": 109,
51
+ "ሿ": 174,
52
+ "ቀ": 33,
53
+ "ቁ": 128,
54
+ "ቂ": 150,
55
+ "ቃ": 99,
56
+ "ቄ": 92,
57
+ "ቅ": 83,
58
+ "ቆ": 74,
59
+ "ቋ": 180,
60
+ "በ": 12,
61
+ "ቡ": 79,
62
+ "ቢ": 110,
63
+ "ባ": 64,
64
+ "ቤ": 85,
65
+ "ብ": 59,
66
+ "ቦ": 86,
67
+ "ቧ": 161,
68
+ "ተ": 5,
69
+ "ቱ": 63,
70
+ "ቲ": 95,
71
+ "ታ": 49,
72
+ "ቴ": 51,
73
+ "ት": 8,
74
+ "ቶ": 34,
75
+ "ቷ": 166,
76
+ "ቸ": 134,
77
+ "ቹ": 45,
78
+ "ቺ": 115,
79
+ "ቻ": 157,
80
+ "ቼ": 129,
81
+ "ች": 18,
82
+ "ቾ": 96,
83
+ "ቿ": 156,
84
+ "ነ": 20,
85
+ "ኑ": 21,
86
+ "ኒ": 19,
87
+ "ና": 46,
88
+ "ኔ": 11,
89
+ "ን": 1,
90
+ "ኖ": 16,
91
+ "ኗ": 148,
92
+ "ኘ": 173,
93
+ "ኙ": 178,
94
+ "አ": 2,
95
+ "ኡ": 55,
96
+ "ኢ": 62,
97
+ "ኣ": 47,
98
+ "ኤ": 41,
99
+ "እ": 3,
100
+ "ኦ": 31,
101
+ "ከ": 14,
102
+ "ኩ": 23,
103
+ "ኪ": 102,
104
+ "ካ": 90,
105
+ "ኬ": 40,
106
+ "ክ": 29,
107
+ "ኮ": 44,
108
+ "ኳ": 181,
109
+ "ወ": 111,
110
+ "ዉ": 104,
111
+ "ዊ": 132,
112
+ "ዋ": 88,
113
+ "ዌ": 164,
114
+ "ው": 112,
115
+ "ዎ": 52,
116
+ "ዕ": 10,
117
+ "ዘ": 61,
118
+ "ዙ": 154,
119
+ "ዚ": 162,
120
+ "ዛ": 135,
121
+ "ዜ": 126,
122
+ "ዝ": 140,
123
+ "ዞ": 151,
124
+ "ዟ": 176,
125
+ "ዦ": 177,
126
+ "የ": 71,
127
+ "ዩ": 153,
128
+ "ዪ": 143,
129
+ "ያ": 93,
130
+ "ዬ": 24,
131
+ "ይ": 58,
132
+ "ዮ": 37,
133
+ "ደ": 22,
134
+ "ዱ": 101,
135
+ "ዲ": 106,
136
+ "ዳ": 43,
137
+ "ዴ": 138,
138
+ "ድ": 73,
139
+ "ዶ": 68,
140
+ "ዷ": 175,
141
+ "ጀ": 113,
142
+ "ጁ": 160,
143
+ "ጂ": 165,
144
+ "ጃ": 84,
145
+ "ጄ": 127,
146
+ "ጅ": 100,
147
+ "ጆ": 139,
148
+ "ገ": 7,
149
+ "ጉ": 65,
150
+ "ጊ": 125,
151
+ "ጋ": 78,
152
+ "ጌ": 118,
153
+ "ግ": 72,
154
+ "ጎ": 94,
155
+ "ጠ": 26,
156
+ "ጡ": 54,
157
+ "ጢ": 116,
158
+ "ጣ": 105,
159
+ "ጤ": 123,
160
+ "ጥ": 103,
161
+ "ጦ": 117,
162
+ "ጧ": 169,
163
+ "ጨ": 114,
164
+ "ጩ": 121,
165
+ "ጪ": 155,
166
+ "ጫ": 142,
167
+ "ጬ": 149,
168
+ "ጭ": 122,
169
+ "ጮ": 119,
170
+ "ጯ": 182,
171
+ "ጰ": 145,
172
+ "ጱ": 171,
173
+ "ጲ": 167,
174
+ "ጳ": 137,
175
+ "ጴ": 146,
176
+ "ጵ": 130,
177
+ "ጶ": 163,
178
+ "ጷ": 179,
179
+ "ፈ": 76,
180
+ "ፉ": 77,
181
+ "ፊ": 158,
182
+ "ፋ": 144,
183
+ "ፌ": 141,
184
+ "ፍ": 97,
185
+ "ፎ": 120,
186
+ "ፏ": 183
187
+ }