update readme file
Browse files
README.md
CHANGED
|
@@ -238,8 +238,8 @@ In this example, the context tokens are 'unforced', meaning the model automatica
|
|
| 238 |
>>> from datasets import load_dataset
|
| 239 |
|
| 240 |
>>> # load model and processor
|
| 241 |
-
>>> processor = WhisperProcessor.from_pretrained("
|
| 242 |
-
>>> model = WhisperForConditionalGeneration.from_pretrained("
|
| 243 |
>>> model.config.forced_decoder_ids = None
|
| 244 |
|
| 245 |
>>> # load dummy dataset and read audio files
|
|
@@ -266,8 +266,8 @@ The following example demonstrates French to French transcription by setting the
|
|
| 266 |
>>> from datasets import Audio, load_dataset
|
| 267 |
|
| 268 |
>>> # load model and processor
|
| 269 |
-
>>> processor = WhisperProcessor.from_pretrained("
|
| 270 |
-
>>> model = WhisperForConditionalGeneration.from_pretrained("
|
| 271 |
>>> forced_decoder_ids = processor.get_decoder_prompt_ids(language="french", task="transcribe")
|
| 272 |
|
| 273 |
>>> # load streaming dataset and read first audio sample
|
|
@@ -296,8 +296,8 @@ Setting the task to "translate" forces the Whisper model to perform speech trans
|
|
| 296 |
>>> from datasets import Audio, load_dataset
|
| 297 |
|
| 298 |
>>> # load model and processor
|
| 299 |
-
>>> processor = WhisperProcessor.from_pretrained("
|
| 300 |
-
>>> model = WhisperForConditionalGeneration.from_pretrained("
|
| 301 |
>>> forced_decoder_ids = processor.get_decoder_prompt_ids(language="french", task="translate")
|
| 302 |
|
| 303 |
>>> # load streaming dataset and read first audio sample
|
|
@@ -325,8 +325,8 @@ This code snippet shows how to evaluate Whisper Medium on [LibriSpeech test-clea
|
|
| 325 |
|
| 326 |
>>> librispeech_test_clean = load_dataset("librispeech_asr", "clean", split="test")
|
| 327 |
|
| 328 |
-
>>> processor = WhisperProcessor.from_pretrained("
|
| 329 |
-
>>> model = WhisperForConditionalGeneration.from_pretrained("
|
| 330 |
|
| 331 |
>>> def map_to_pred(batch):
|
| 332 |
>>> audio = batch["audio"]
|
|
@@ -363,7 +363,7 @@ can be run with batched inference. It can also be extended to predict sequence l
|
|
| 363 |
|
| 364 |
>>> pipe = pipeline(
|
| 365 |
>>> "automatic-speech-recognition",
|
| 366 |
-
>>> model="
|
| 367 |
>>> chunk_length_s=30,
|
| 368 |
>>> device=device,
|
| 369 |
>>> )
|
|
|
|
| 238 |
>>> from datasets import load_dataset
|
| 239 |
|
| 240 |
>>> # load model and processor
|
| 241 |
+
>>> processor = WhisperProcessor.from_pretrained("Varosa/whisper-medium")
|
| 242 |
+
>>> model = WhisperForConditionalGeneration.from_pretrained("Varosa/whisper-medium")
|
| 243 |
>>> model.config.forced_decoder_ids = None
|
| 244 |
|
| 245 |
>>> # load dummy dataset and read audio files
|
|
|
|
| 266 |
>>> from datasets import Audio, load_dataset
|
| 267 |
|
| 268 |
>>> # load model and processor
|
| 269 |
+
>>> processor = WhisperProcessor.from_pretrained("Varosa/whisper-medium")
|
| 270 |
+
>>> model = WhisperForConditionalGeneration.from_pretrained("Varosa/whisper-medium")
|
| 271 |
>>> forced_decoder_ids = processor.get_decoder_prompt_ids(language="french", task="transcribe")
|
| 272 |
|
| 273 |
>>> # load streaming dataset and read first audio sample
|
|
|
|
| 296 |
>>> from datasets import Audio, load_dataset
|
| 297 |
|
| 298 |
>>> # load model and processor
|
| 299 |
+
>>> processor = WhisperProcessor.from_pretrained("Varosa/whisper-medium")
|
| 300 |
+
>>> model = WhisperForConditionalGeneration.from_pretrained("Varosa/whisper-medium")
|
| 301 |
>>> forced_decoder_ids = processor.get_decoder_prompt_ids(language="french", task="translate")
|
| 302 |
|
| 303 |
>>> # load streaming dataset and read first audio sample
|
|
|
|
| 325 |
|
| 326 |
>>> librispeech_test_clean = load_dataset("librispeech_asr", "clean", split="test")
|
| 327 |
|
| 328 |
+
>>> processor = WhisperProcessor.from_pretrained("Varosa/whisper-medium")
|
| 329 |
+
>>> model = WhisperForConditionalGeneration.from_pretrained("Varosa/whisper-medium").to("cuda")
|
| 330 |
|
| 331 |
>>> def map_to_pred(batch):
|
| 332 |
>>> audio = batch["audio"]
|
|
|
|
| 363 |
|
| 364 |
>>> pipe = pipeline(
|
| 365 |
>>> "automatic-speech-recognition",
|
| 366 |
+
>>> model="Varosa/whisper-medium",
|
| 367 |
>>> chunk_length_s=30,
|
| 368 |
>>> device=device,
|
| 369 |
>>> )
|