xangcastle commited on
Commit
7630a02
·
1 Parent(s): eb3f2cb
Files changed (1) hide show
  1. app.py +21 -6
app.py CHANGED
@@ -1,18 +1,33 @@
1
  from transformers import pipeline
2
  import gradio as gr
3
  import pytube as pt
 
4
 
5
- model = pipeline(model="openai/whisper-medium") # change to "your-username/the-name-you-picked"
 
6
 
 
 
 
 
 
 
 
 
7
 
8
- options = dict(language='es', beam_size=5, best_of=5)
9
- transcribe_options = dict(task="transcribe", **options)
10
- translate_options = dict(task="translate", **options)
 
 
 
 
 
11
 
12
 
13
  def transcribe(audio):
14
- transcription = model.transcribe(audio, **transcribe_options)
15
- translation = model.transcribe(audio, **translate_options)
16
  return transcription["text"], translation["text"]
17
 
18
 
 
1
  from transformers import pipeline
2
  import gradio as gr
3
  import pytube as pt
4
+ import torch
5
 
6
+ MODEL_NAME = "openai/whisper-medium"
7
+ device = 0 if torch.cuda.is_available() else "cpu"
8
 
9
+ transcribe = pipeline(
10
+ task="automatic-speech-recognition",
11
+ model=MODEL_NAME,
12
+ chunk_length_s=30,
13
+ device=device,
14
+ )
15
+ transcribe.model.config.forced_decoder_ids = transcribe.tokenizer.get_decoder_prompt_ids(language='en',
16
+ task="transcribe")
17
 
18
+ translate = pipeline(
19
+ task="automatic-speech-recognition",
20
+ model=MODEL_NAME,
21
+ chunk_length_s=30,
22
+ device=device,
23
+ )
24
+ transcribe.model.config.forced_decoder_ids = transcribe.tokenizer.get_decoder_prompt_ids(language='es',
25
+ task="translate")
26
 
27
 
28
  def transcribe(audio):
29
+ transcription = transcribe(audio)
30
+ translation = translate(audio)
31
  return transcription["text"], translation["text"]
32
 
33