Varosa
/

whisper-medium

@@ -238,8 +238,8 @@ In this example, the context tokens are 'unforced', meaning the model automatica
 >>> from datasets import load_dataset
 >>> # load model and processor
->>> processor = WhisperProcessor.from_pretrained("openai/whisper-medium")
->>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-medium")
 >>> model.config.forced_decoder_ids = None
 >>> # load dummy dataset and read audio files
@@ -266,8 +266,8 @@ The following example demonstrates French to French transcription by setting the
 >>> from datasets import Audio, load_dataset
 >>> # load model and processor
->>> processor = WhisperProcessor.from_pretrained("openai/whisper-medium")
->>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-medium")
 >>> forced_decoder_ids = processor.get_decoder_prompt_ids(language="french", task="transcribe")
 >>> # load streaming dataset and read first audio sample
@@ -296,8 +296,8 @@ Setting the task to "translate" forces the Whisper model to perform speech trans
 >>> from datasets import Audio, load_dataset
 >>> # load model and processor
->>> processor = WhisperProcessor.from_pretrained("openai/whisper-medium")
->>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-medium")
 >>> forced_decoder_ids = processor.get_decoder_prompt_ids(language="french", task="translate")
 >>> # load streaming dataset and read first audio sample
@@ -325,8 +325,8 @@ This code snippet shows how to evaluate Whisper Medium on [LibriSpeech test-clea
 >>> librispeech_test_clean = load_dataset("librispeech_asr", "clean", split="test")
->>> processor = WhisperProcessor.from_pretrained("openai/whisper-medium")
->>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-medium").to("cuda")
 >>> def map_to_pred(batch):
 >>>     audio = batch["audio"]
@@ -363,7 +363,7 @@ can be run with batched inference. It can also be extended to predict sequence l
 >>> pipe = pipeline(
 >>>   "automatic-speech-recognition",
->>>   model="openai/whisper-medium",
 >>>   chunk_length_s=30,
 >>>   device=device,
 >>> )

 >>> from datasets import load_dataset
 >>> # load model and processor
+>>> processor = WhisperProcessor.from_pretrained("Varosa/whisper-medium")
+>>> model = WhisperForConditionalGeneration.from_pretrained("Varosa/whisper-medium")
 >>> model.config.forced_decoder_ids = None
 >>> # load dummy dataset and read audio files
 >>> from datasets import Audio, load_dataset
 >>> # load model and processor
+>>> processor = WhisperProcessor.from_pretrained("Varosa/whisper-medium")
+>>> model = WhisperForConditionalGeneration.from_pretrained("Varosa/whisper-medium")
 >>> forced_decoder_ids = processor.get_decoder_prompt_ids(language="french", task="transcribe")
 >>> # load streaming dataset and read first audio sample
 >>> from datasets import Audio, load_dataset
 >>> # load model and processor
+>>> processor = WhisperProcessor.from_pretrained("Varosa/whisper-medium")
+>>> model = WhisperForConditionalGeneration.from_pretrained("Varosa/whisper-medium")
 >>> forced_decoder_ids = processor.get_decoder_prompt_ids(language="french", task="translate")
 >>> # load streaming dataset and read first audio sample
 >>> librispeech_test_clean = load_dataset("librispeech_asr", "clean", split="test")
+>>> processor = WhisperProcessor.from_pretrained("Varosa/whisper-medium")
+>>> model = WhisperForConditionalGeneration.from_pretrained("Varosa/whisper-medium").to("cuda")
 >>> def map_to_pred(batch):
 >>>     audio = batch["audio"]
 >>> pipe = pipeline(
 >>>   "automatic-speech-recognition",
+>>>   model="Varosa/whisper-medium",
 >>>   chunk_length_s=30,
 >>>   device=device,
 >>> )