xangcastle commited on
Commit
3355b10
·
1 Parent(s): 43f52c4

more options

Browse files
Files changed (6) hide show
  1. app.py +55 -12
  2. images/banan.jpeg +0 -0
  3. images/hung.jpeg +0 -0
  4. images/katt.jpeg +0 -0
  5. images/melon.jpeg +0 -0
  6. requirements.txt +2 -1
app.py CHANGED
@@ -1,19 +1,62 @@
1
  from transformers import pipeline
2
  import gradio as gr
3
- from numpy import random
4
- from PIL import Image
5
- pipe = pipeline(model="openai/whisper-medium") # change to "your-username/the-name-you-picked"
 
 
 
 
 
 
 
 
 
 
6
 
7
  def transcribe(audio):
8
- text = pipe(audio)["text"]
9
- return text
10
-
11
- iface = gr.Interface(
12
- fn=transcribe,
13
- inputs=[gr.Audio(source="microphone", type="filepath"),],
14
- outputs="text",
15
- title="Make transcription of audio",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  )
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- iface.launch()
 
 
 
 
 
1
  from transformers import pipeline
2
  import gradio as gr
3
+ import numpy as np
4
+ import pytube as pt
5
+
6
+ model = pipeline(model="openai/whisper-medium") # change to "your-username/the-name-you-picked"
7
+ print(
8
+ f"Model is {'multilingual' if model.is_multilingual else 'English-only'} "
9
+ f"and has {sum(np.prod(p.shape) for p in model.parameters()):,} parameters."
10
+ )
11
+
12
+ options = dict(language='es', beam_size=5, best_of=5)
13
+ transcribe_options = dict(task="transcribe", **options)
14
+ translate_options = dict(task="translate", **options)
15
+
16
 
17
  def transcribe(audio):
18
+ transcription = model.transcribe(audio, **transcribe_options)
19
+ translation = model.transcribe(audio, **translate_options)
20
+ return transcription["text"], translation["text"]
21
+
22
+
23
+ def youtube_transcribe(url):
24
+ yt = pt.YouTube(url)
25
+ stream = yt.streams.filter(only_audio=True).first()
26
+ audio = stream.download()
27
+ return transcribe(audio)
28
+
29
+
30
+ mic_interface = gr.Interface(
31
+ fn=transcribe,
32
+ inputs=gr.Audio(source="microphone", type="filepath"),
33
+ outputs=["text", "text"],
34
+ title="Transcribir y traducir audio",
35
+ )
36
+
37
+ audio_interface = gr.Interface(
38
+ fn=transcribe,
39
+ inputs="audio",
40
+ outputs=["text", "text"],
41
+ title="Transcribir y traducir audio",
42
  )
43
 
44
+ video_interface = gr.Interface(
45
+ fn=transcribe,
46
+ inputs="video",
47
+ outputs=["text", "text"],
48
+ title="Transcribir y traducir audio",
49
+ )
50
+
51
+ youtube_interface = gr.Interface(
52
+ fn=youtube_transcribe,
53
+ inputs="text",
54
+ outputs=["text", "text"],
55
+ title="Transcribir y traducir video de YouTube",
56
+ )
57
 
58
+ if __name__ == "__main__":
59
+ gr.TabbedInterface(
60
+ [mic_interface, audio_interface, video_interface, youtube_interface],
61
+ ["Micrófono", "Audio", "Video", "YouTube"],
62
+ ).launch()
images/banan.jpeg DELETED
Binary file (58.9 kB)
 
images/hung.jpeg DELETED
Binary file (18.7 kB)
 
images/katt.jpeg DELETED
Binary file (21.8 kB)
 
images/melon.jpeg DELETED
Binary file (41 kB)
 
requirements.txt CHANGED
@@ -2,4 +2,5 @@ transformers
2
  tensorflow
3
  torch
4
  sentencepiece
5
- numpy
 
 
2
  tensorflow
3
  torch
4
  sentencepiece
5
+ numpy
6
+ pytube