Spaces:
Sleeping
Sleeping
Joshua Lochner commited on
Commit ·
3979408
1
Parent(s): 0c54572
Add support for distil-whisper models
Browse files- src/components/AudioManager.tsx +16 -6
- src/utils/Constants.ts +2 -2
- src/worker.js +8 -4
src/components/AudioManager.tsx
CHANGED
|
@@ -368,10 +368,15 @@ function SettingsModal(props: {
|
|
| 368 |
const names = Object.values(LANGUAGES).map(titleCase);
|
| 369 |
|
| 370 |
const models = {
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 375 |
};
|
| 376 |
return (
|
| 377 |
<Modal
|
|
@@ -394,9 +399,14 @@ function SettingsModal(props: {
|
|
| 394 |
// @ts-ignore
|
| 395 |
models[key].length == 2,
|
| 396 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
.map((key) => (
|
| 398 |
-
<option key={key} value={key}>{`
|
| 399 |
-
props.transcriber.multilingual ? "" : ".en"
|
| 400 |
} (${
|
| 401 |
// @ts-ignore
|
| 402 |
models[key][
|
|
|
|
| 368 |
const names = Object.values(LANGUAGES).map(titleCase);
|
| 369 |
|
| 370 |
const models = {
|
| 371 |
+
// Original checkpoints
|
| 372 |
+
'Xenova/whisper-tiny': [41, 152],
|
| 373 |
+
'Xenova/whisper-base': [77, 291],
|
| 374 |
+
'Xenova/whisper-small': [249],
|
| 375 |
+
'Xenova/whisper-medium': [776],
|
| 376 |
+
|
| 377 |
+
// Distil Whisper (English-only)
|
| 378 |
+
'distil-whisper/distil-medium.en': [402],
|
| 379 |
+
'distil-whisper/distil-large-v2': [767],
|
| 380 |
};
|
| 381 |
return (
|
| 382 |
<Modal
|
|
|
|
| 399 |
// @ts-ignore
|
| 400 |
models[key].length == 2,
|
| 401 |
)
|
| 402 |
+
.filter(
|
| 403 |
+
(key) => (
|
| 404 |
+
!props.transcriber.multilingual || !key.startsWith('distil-whisper/')
|
| 405 |
+
)
|
| 406 |
+
)
|
| 407 |
.map((key) => (
|
| 408 |
+
<option key={key} value={key}>{`${key}${
|
| 409 |
+
(props.transcriber.multilingual || key.startsWith('distil-whisper/')) ? "" : ".en"
|
| 410 |
} (${
|
| 411 |
// @ts-ignore
|
| 412 |
models[key][
|
src/utils/Constants.ts
CHANGED
|
@@ -24,9 +24,9 @@ const isMobileOrTablet = mobileTabletCheck();
|
|
| 24 |
export default {
|
| 25 |
SAMPLING_RATE: 16000,
|
| 26 |
DEFAULT_AUDIO_URL: `https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/${
|
| 27 |
-
isMobileOrTablet ? "jfk" : "
|
| 28 |
}.wav`,
|
| 29 |
-
DEFAULT_MODEL: "tiny",
|
| 30 |
DEFAULT_SUBTASK: "transcribe",
|
| 31 |
DEFAULT_LANGUAGE: "english",
|
| 32 |
DEFAULT_QUANTIZED: isMobileOrTablet,
|
|
|
|
| 24 |
export default {
|
| 25 |
SAMPLING_RATE: 16000,
|
| 26 |
DEFAULT_AUDIO_URL: `https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/${
|
| 27 |
+
isMobileOrTablet ? "jfk" : "ted_60_16k"
|
| 28 |
}.wav`,
|
| 29 |
+
DEFAULT_MODEL: "Xenova/whisper-tiny",
|
| 30 |
DEFAULT_SUBTASK: "transcribe",
|
| 31 |
DEFAULT_LANGUAGE: "english",
|
| 32 |
DEFAULT_QUANTIZED: isMobileOrTablet,
|
src/worker.js
CHANGED
|
@@ -67,9 +67,13 @@ const transcribe = async (
|
|
| 67 |
subtask,
|
| 68 |
language,
|
| 69 |
) => {
|
| 70 |
-
// TODO use subtask and language
|
| 71 |
|
| 72 |
-
const
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
const p = AutomaticSpeechRecognitionPipelineFactory;
|
| 75 |
if (p.model !== modelName || p.quantized !== quantized) {
|
|
@@ -148,8 +152,8 @@ const transcribe = async (
|
|
| 148 |
do_sample: false,
|
| 149 |
|
| 150 |
// Sliding window
|
| 151 |
-
chunk_length_s: 30,
|
| 152 |
-
stride_length_s: 5,
|
| 153 |
|
| 154 |
// Language and task
|
| 155 |
language: language,
|
|
|
|
| 67 |
subtask,
|
| 68 |
language,
|
| 69 |
) => {
|
|
|
|
| 70 |
|
| 71 |
+
const isDistilWhisper = model.startsWith("distil-whisper/");
|
| 72 |
+
|
| 73 |
+
let modelName = model;
|
| 74 |
+
if (!isDistilWhisper && !multilingual) {
|
| 75 |
+
modelName += ".en"
|
| 76 |
+
}
|
| 77 |
|
| 78 |
const p = AutomaticSpeechRecognitionPipelineFactory;
|
| 79 |
if (p.model !== modelName || p.quantized !== quantized) {
|
|
|
|
| 152 |
do_sample: false,
|
| 153 |
|
| 154 |
// Sliding window
|
| 155 |
+
chunk_length_s: isDistilWhisper ? 20 : 30,
|
| 156 |
+
stride_length_s: isDistilWhisper ? 3 : 5,
|
| 157 |
|
| 158 |
// Language and task
|
| 159 |
language: language,
|