Joshua Lochner commited on
Commit
3979408
·
1 Parent(s): 0c54572

Add support for distil-whisper models

Browse files
src/components/AudioManager.tsx CHANGED
@@ -368,10 +368,15 @@ function SettingsModal(props: {
368
  const names = Object.values(LANGUAGES).map(titleCase);
369
 
370
  const models = {
371
- tiny: [41, 152],
372
- base: [77, 291],
373
- small: [249],
374
- medium: [776],
 
 
 
 
 
375
  };
376
  return (
377
  <Modal
@@ -394,9 +399,14 @@ function SettingsModal(props: {
394
  // @ts-ignore
395
  models[key].length == 2,
396
  )
 
 
 
 
 
397
  .map((key) => (
398
- <option key={key} value={key}>{`whisper-${key}${
399
- props.transcriber.multilingual ? "" : ".en"
400
  } (${
401
  // @ts-ignore
402
  models[key][
 
368
  const names = Object.values(LANGUAGES).map(titleCase);
369
 
370
  const models = {
371
+ // Original checkpoints
372
+ 'Xenova/whisper-tiny': [41, 152],
373
+ 'Xenova/whisper-base': [77, 291],
374
+ 'Xenova/whisper-small': [249],
375
+ 'Xenova/whisper-medium': [776],
376
+
377
+ // Distil Whisper (English-only)
378
+ 'distil-whisper/distil-medium.en': [402],
379
+ 'distil-whisper/distil-large-v2': [767],
380
  };
381
  return (
382
  <Modal
 
399
  // @ts-ignore
400
  models[key].length == 2,
401
  )
402
+ .filter(
403
+ (key) => (
404
+ !props.transcriber.multilingual || !key.startsWith('distil-whisper/')
405
+ )
406
+ )
407
  .map((key) => (
408
+ <option key={key} value={key}>{`${key}${
409
+ (props.transcriber.multilingual || key.startsWith('distil-whisper/')) ? "" : ".en"
410
  } (${
411
  // @ts-ignore
412
  models[key][
src/utils/Constants.ts CHANGED
@@ -24,9 +24,9 @@ const isMobileOrTablet = mobileTabletCheck();
24
  export default {
25
  SAMPLING_RATE: 16000,
26
  DEFAULT_AUDIO_URL: `https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/${
27
- isMobileOrTablet ? "jfk" : "ted_60"
28
  }.wav`,
29
- DEFAULT_MODEL: "tiny",
30
  DEFAULT_SUBTASK: "transcribe",
31
  DEFAULT_LANGUAGE: "english",
32
  DEFAULT_QUANTIZED: isMobileOrTablet,
 
24
  export default {
25
  SAMPLING_RATE: 16000,
26
  DEFAULT_AUDIO_URL: `https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/${
27
+ isMobileOrTablet ? "jfk" : "ted_60_16k"
28
  }.wav`,
29
+ DEFAULT_MODEL: "Xenova/whisper-tiny",
30
  DEFAULT_SUBTASK: "transcribe",
31
  DEFAULT_LANGUAGE: "english",
32
  DEFAULT_QUANTIZED: isMobileOrTablet,
src/worker.js CHANGED
@@ -67,9 +67,13 @@ const transcribe = async (
67
  subtask,
68
  language,
69
  ) => {
70
- // TODO use subtask and language
71
 
72
- const modelName = `Xenova/whisper-${model}${multilingual ? "" : ".en"}`;
 
 
 
 
 
73
 
74
  const p = AutomaticSpeechRecognitionPipelineFactory;
75
  if (p.model !== modelName || p.quantized !== quantized) {
@@ -148,8 +152,8 @@ const transcribe = async (
148
  do_sample: false,
149
 
150
  // Sliding window
151
- chunk_length_s: 30,
152
- stride_length_s: 5,
153
 
154
  // Language and task
155
  language: language,
 
67
  subtask,
68
  language,
69
  ) => {
 
70
 
71
+ const isDistilWhisper = model.startsWith("distil-whisper/");
72
+
73
+ let modelName = model;
74
+ if (!isDistilWhisper && !multilingual) {
75
+ modelName += ".en"
76
+ }
77
 
78
  const p = AutomaticSpeechRecognitionPipelineFactory;
79
  if (p.model !== modelName || p.quantized !== quantized) {
 
152
  do_sample: false,
153
 
154
  // Sliding window
155
+ chunk_length_s: isDistilWhisper ? 20 : 30,
156
+ stride_length_s: isDistilWhisper ? 3 : 5,
157
 
158
  // Language and task
159
  language: language,