lucasconsult commited on
Commit
17060c3
·
verified ·
1 Parent(s): 5e7f0a5

Upload whisper-tiny-en

Browse files
Files changed (6) hide show
  1. .gitattributes +0 -1
  2. README.md +42 -0
  3. config.json +232 -0
  4. model.bin +3 -0
  5. tokenizer.json +0 -0
  6. vocabulary.txt +0 -0
.gitattributes CHANGED
@@ -25,7 +25,6 @@
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
  *.wasm filter=lfs diff=lfs merge=lfs -text
 
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
28
  *.tflite filter=lfs diff=lfs merge=lfs -text
29
  *.tgz filter=lfs diff=lfs merge=lfs -text
30
  *.wasm filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ tags:
5
+ - audio
6
+ - automatic-speech-recognition
7
+ license: mit
8
+ library_name: ctranslate2
9
+ ---
10
+
11
+ # Whisper tiny.en model for CTranslate2
12
+
13
+ This repository contains the conversion of [openai/whisper-tiny.en](https://huggingface.co/openai/whisper-tiny.en) to the [CTranslate2](https://github.com/OpenNMT/CTranslate2) model format.
14
+
15
+ This model can be used in CTranslate2 or projects based on CTranslate2 such as [faster-whisper](https://github.com/systran/faster-whisper).
16
+
17
+ ## Example
18
+
19
+ ```python
20
+ from faster_whisper import WhisperModel
21
+
22
+ model = WhisperModel("tiny.en")
23
+
24
+ segments, info = model.transcribe("audio.mp3")
25
+ for segment in segments:
26
+ print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
27
+ ```
28
+
29
+ ## Conversion details
30
+
31
+ The original model was converted with the following command:
32
+
33
+ ```
34
+ ct2-transformers-converter --model openai/whisper-tiny.en --output_dir faster-whisper-tiny.en \
35
+ --copy_files tokenizer.json --quantization float16
36
+ ```
37
+
38
+ Note that the model weights are saved in FP16. This type can be changed when the model is loaded using the [`compute_type` option in CTranslate2](https://opennmt.net/CTranslate2/quantization.html).
39
+
40
+ ## More information
41
+
42
+ **For more information about the original model, see its [model card](https://huggingface.co/openai/whisper-tiny.en).**
config.json ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alignment_heads": [
3
+ [
4
+ 1,
5
+ 0
6
+ ],
7
+ [
8
+ 2,
9
+ 0
10
+ ],
11
+ [
12
+ 2,
13
+ 5
14
+ ],
15
+ [
16
+ 3,
17
+ 0
18
+ ],
19
+ [
20
+ 3,
21
+ 1
22
+ ],
23
+ [
24
+ 3,
25
+ 2
26
+ ],
27
+ [
28
+ 3,
29
+ 3
30
+ ],
31
+ [
32
+ 3,
33
+ 4
34
+ ]
35
+ ],
36
+ "lang_ids": [
37
+ 50259,
38
+ 50260,
39
+ 50261,
40
+ 50262,
41
+ 50263,
42
+ 50264,
43
+ 50265,
44
+ 50266,
45
+ 50267,
46
+ 50268,
47
+ 50269,
48
+ 50270,
49
+ 50271,
50
+ 50272,
51
+ 50273,
52
+ 50274,
53
+ 50275,
54
+ 50276,
55
+ 50277,
56
+ 50278,
57
+ 50279,
58
+ 50280,
59
+ 50281,
60
+ 50282,
61
+ 50283,
62
+ 50284,
63
+ 50285,
64
+ 50286,
65
+ 50287,
66
+ 50288,
67
+ 50289,
68
+ 50290,
69
+ 50291,
70
+ 50292,
71
+ 50293,
72
+ 50294,
73
+ 50295,
74
+ 50296,
75
+ 50297,
76
+ 50298,
77
+ 50299,
78
+ 50300,
79
+ 50301,
80
+ 50302,
81
+ 50303,
82
+ 50304,
83
+ 50305,
84
+ 50306,
85
+ 50307,
86
+ 50308,
87
+ 50309,
88
+ 50310,
89
+ 50311,
90
+ 50312,
91
+ 50313,
92
+ 50314,
93
+ 50315,
94
+ 50316,
95
+ 50317,
96
+ 50318,
97
+ 50319,
98
+ 50320,
99
+ 50321,
100
+ 50322,
101
+ 50323,
102
+ 50324,
103
+ 50325,
104
+ 50326,
105
+ 50327,
106
+ 50328,
107
+ 50329,
108
+ 50330,
109
+ 50331,
110
+ 50332,
111
+ 50333,
112
+ 50334,
113
+ 50335,
114
+ 50336,
115
+ 50337,
116
+ 50338,
117
+ 50339,
118
+ 50340,
119
+ 50341,
120
+ 50342,
121
+ 50343,
122
+ 50344,
123
+ 50345,
124
+ 50346,
125
+ 50347,
126
+ 50348,
127
+ 50349,
128
+ 50350,
129
+ 50351,
130
+ 50352,
131
+ 50353,
132
+ 50354,
133
+ 50355,
134
+ 50356
135
+ ],
136
+ "suppress_ids": [
137
+ 1,
138
+ 2,
139
+ 7,
140
+ 8,
141
+ 9,
142
+ 10,
143
+ 14,
144
+ 25,
145
+ 26,
146
+ 27,
147
+ 28,
148
+ 29,
149
+ 31,
150
+ 58,
151
+ 59,
152
+ 60,
153
+ 61,
154
+ 62,
155
+ 63,
156
+ 90,
157
+ 91,
158
+ 92,
159
+ 93,
160
+ 357,
161
+ 366,
162
+ 438,
163
+ 532,
164
+ 685,
165
+ 705,
166
+ 796,
167
+ 930,
168
+ 1058,
169
+ 1220,
170
+ 1267,
171
+ 1279,
172
+ 1303,
173
+ 1343,
174
+ 1377,
175
+ 1391,
176
+ 1635,
177
+ 1782,
178
+ 1875,
179
+ 2162,
180
+ 2361,
181
+ 2488,
182
+ 3467,
183
+ 4008,
184
+ 4211,
185
+ 4600,
186
+ 4808,
187
+ 5299,
188
+ 5855,
189
+ 6329,
190
+ 7203,
191
+ 9609,
192
+ 9959,
193
+ 10563,
194
+ 10786,
195
+ 11420,
196
+ 11709,
197
+ 11907,
198
+ 13163,
199
+ 13697,
200
+ 13700,
201
+ 14808,
202
+ 15306,
203
+ 16410,
204
+ 16791,
205
+ 17992,
206
+ 19203,
207
+ 19510,
208
+ 20724,
209
+ 22305,
210
+ 22935,
211
+ 27007,
212
+ 30109,
213
+ 30420,
214
+ 33409,
215
+ 34949,
216
+ 40283,
217
+ 40493,
218
+ 40549,
219
+ 47282,
220
+ 49146,
221
+ 50257,
222
+ 50357,
223
+ 50358,
224
+ 50359,
225
+ 50360,
226
+ 50361
227
+ ],
228
+ "suppress_ids_begin": [
229
+ 220,
230
+ 50256
231
+ ]
232
+ }
model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a5afae06a4db91c975c9a9d78be5cc110ee4ea022ad57d55492e4550e936b2a
3
+ size 75537502
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
vocabulary.txt ADDED
The diff for this file is too large to render. See raw diff