lucasconsult commited on
Commit
d249cd5
·
0 Parent(s):

Duplicate from lucasconsult/faster-whisper-small.en

Browse files
Files changed (6) hide show
  1. .gitattributes +34 -0
  2. README.md +42 -0
  3. config.json +276 -0
  4. model.bin +3 -0
  5. tokenizer.json +0 -0
  6. vocabulary.txt +0 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ tags:
5
+ - audio
6
+ - automatic-speech-recognition
7
+ license: mit
8
+ library_name: ctranslate2
9
+ ---
10
+
11
+ # Whisper small.en model for CTranslate2
12
+
13
+ This repository contains the conversion of [openai/whisper-small.en](https://huggingface.co/openai/whisper-small.en) to the [CTranslate2](https://github.com/OpenNMT/CTranslate2) model format.
14
+
15
+ This model can be used in CTranslate2 or projects based on CTranslate2 such as [faster-whisper](https://github.com/systran/faster-whisper).
16
+
17
+ ## Example
18
+
19
+ ```python
20
+ from faster_whisper import WhisperModel
21
+
22
+ model = WhisperModel("small.en")
23
+
24
+ segments, info = model.transcribe("audio.mp3")
25
+ for segment in segments:
26
+ print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
27
+ ```
28
+
29
+ ## Conversion details
30
+
31
+ The original model was converted with the following command:
32
+
33
+ ```
34
+ ct2-transformers-converter --model openai/whisper-small.en --output_dir faster-whisper-small.en \
35
+ --copy_files tokenizer.json --quantization float16
36
+ ```
37
+
38
+ Note that the model weights are saved in FP16. This type can be changed when the model is loaded using the [`compute_type` option in CTranslate2](https://opennmt.net/CTranslate2/quantization.html).
39
+
40
+ ## More information
41
+
42
+ **For more information about the original model, see its [model card](https://huggingface.co/openai/whisper-small.en).**
config.json ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alignment_heads": [
3
+ [
4
+ 6,
5
+ 6
6
+ ],
7
+ [
8
+ 7,
9
+ 0
10
+ ],
11
+ [
12
+ 7,
13
+ 3
14
+ ],
15
+ [
16
+ 7,
17
+ 8
18
+ ],
19
+ [
20
+ 8,
21
+ 2
22
+ ],
23
+ [
24
+ 8,
25
+ 5
26
+ ],
27
+ [
28
+ 8,
29
+ 7
30
+ ],
31
+ [
32
+ 9,
33
+ 0
34
+ ],
35
+ [
36
+ 9,
37
+ 4
38
+ ],
39
+ [
40
+ 9,
41
+ 8
42
+ ],
43
+ [
44
+ 9,
45
+ 10
46
+ ],
47
+ [
48
+ 10,
49
+ 0
50
+ ],
51
+ [
52
+ 10,
53
+ 1
54
+ ],
55
+ [
56
+ 10,
57
+ 2
58
+ ],
59
+ [
60
+ 10,
61
+ 3
62
+ ],
63
+ [
64
+ 10,
65
+ 6
66
+ ],
67
+ [
68
+ 10,
69
+ 11
70
+ ],
71
+ [
72
+ 11,
73
+ 2
74
+ ],
75
+ [
76
+ 11,
77
+ 4
78
+ ]
79
+ ],
80
+ "lang_ids": [
81
+ 50259,
82
+ 50260,
83
+ 50261,
84
+ 50262,
85
+ 50263,
86
+ 50264,
87
+ 50265,
88
+ 50266,
89
+ 50267,
90
+ 50268,
91
+ 50269,
92
+ 50270,
93
+ 50271,
94
+ 50272,
95
+ 50273,
96
+ 50274,
97
+ 50275,
98
+ 50276,
99
+ 50277,
100
+ 50278,
101
+ 50279,
102
+ 50280,
103
+ 50281,
104
+ 50282,
105
+ 50283,
106
+ 50284,
107
+ 50285,
108
+ 50286,
109
+ 50287,
110
+ 50288,
111
+ 50289,
112
+ 50290,
113
+ 50291,
114
+ 50292,
115
+ 50293,
116
+ 50294,
117
+ 50295,
118
+ 50296,
119
+ 50297,
120
+ 50298,
121
+ 50299,
122
+ 50300,
123
+ 50301,
124
+ 50302,
125
+ 50303,
126
+ 50304,
127
+ 50305,
128
+ 50306,
129
+ 50307,
130
+ 50308,
131
+ 50309,
132
+ 50310,
133
+ 50311,
134
+ 50312,
135
+ 50313,
136
+ 50314,
137
+ 50315,
138
+ 50316,
139
+ 50317,
140
+ 50318,
141
+ 50319,
142
+ 50320,
143
+ 50321,
144
+ 50322,
145
+ 50323,
146
+ 50324,
147
+ 50325,
148
+ 50326,
149
+ 50327,
150
+ 50328,
151
+ 50329,
152
+ 50330,
153
+ 50331,
154
+ 50332,
155
+ 50333,
156
+ 50334,
157
+ 50335,
158
+ 50336,
159
+ 50337,
160
+ 50338,
161
+ 50339,
162
+ 50340,
163
+ 50341,
164
+ 50342,
165
+ 50343,
166
+ 50344,
167
+ 50345,
168
+ 50346,
169
+ 50347,
170
+ 50348,
171
+ 50349,
172
+ 50350,
173
+ 50351,
174
+ 50352,
175
+ 50353,
176
+ 50354,
177
+ 50355,
178
+ 50356
179
+ ],
180
+ "suppress_ids": [
181
+ 1,
182
+ 2,
183
+ 7,
184
+ 8,
185
+ 9,
186
+ 10,
187
+ 14,
188
+ 25,
189
+ 26,
190
+ 27,
191
+ 28,
192
+ 29,
193
+ 31,
194
+ 58,
195
+ 59,
196
+ 60,
197
+ 61,
198
+ 62,
199
+ 63,
200
+ 90,
201
+ 91,
202
+ 92,
203
+ 93,
204
+ 357,
205
+ 366,
206
+ 438,
207
+ 532,
208
+ 685,
209
+ 705,
210
+ 796,
211
+ 930,
212
+ 1058,
213
+ 1220,
214
+ 1267,
215
+ 1279,
216
+ 1303,
217
+ 1343,
218
+ 1377,
219
+ 1391,
220
+ 1635,
221
+ 1782,
222
+ 1875,
223
+ 2162,
224
+ 2361,
225
+ 2488,
226
+ 3467,
227
+ 4008,
228
+ 4211,
229
+ 4600,
230
+ 4808,
231
+ 5299,
232
+ 5855,
233
+ 6329,
234
+ 7203,
235
+ 9609,
236
+ 9959,
237
+ 10563,
238
+ 10786,
239
+ 11420,
240
+ 11709,
241
+ 11907,
242
+ 13163,
243
+ 13697,
244
+ 13700,
245
+ 14808,
246
+ 15306,
247
+ 16410,
248
+ 16791,
249
+ 17992,
250
+ 19203,
251
+ 19510,
252
+ 20724,
253
+ 22305,
254
+ 22935,
255
+ 27007,
256
+ 30109,
257
+ 30420,
258
+ 33409,
259
+ 34949,
260
+ 40283,
261
+ 40493,
262
+ 40549,
263
+ 47282,
264
+ 49146,
265
+ 50257,
266
+ 50357,
267
+ 50358,
268
+ 50359,
269
+ 50360,
270
+ 50361
271
+ ],
272
+ "suppress_ids_begin": [
273
+ 220,
274
+ 50256
275
+ ]
276
+ }
model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62b2a45b05ee59acb4a5341b33ee35e041395d378d418a18acfe4c9e768ee37a
3
+ size 483545366
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
vocabulary.txt ADDED
The diff for this file is too large to render. See raw diff