xangcastle commited on
Commit
bee8bb6
·
1 Parent(s): f36e573

adding translation

Browse files
Files changed (2) hide show
  1. app.py +12 -23
  2. requirements.txt +74 -6
app.py CHANGED
@@ -1,33 +1,22 @@
1
- from transformers import pipeline
2
  import gradio as gr
 
 
3
  import pytube as pt
4
- import torch
5
 
6
- MODEL_NAME = "openai/whisper-medium"
7
- device = 0 if torch.cuda.is_available() else "cpu"
8
-
9
- transcriptor = pipeline(
10
- task="automatic-speech-recognition",
11
- model=MODEL_NAME,
12
- chunk_length_s=30,
13
- device=device,
14
  )
15
- # transcriptor.model.config.forced_decoder_ids = transcriptor.tokenizer.get_decoder_prompt_ids(language='en',
16
- # task="transcribe")
17
 
18
- translator = pipeline(
19
- task="automatic-speech-recognition",
20
- model=MODEL_NAME,
21
- chunk_length_s=30,
22
- device=device,
23
- )
24
- # translator.model.config.forced_decoder_ids = translator.tokenizer.get_decoder_prompt_ids(language='es',
25
- # task="translate")
26
 
27
 
28
  def transcribe(audio):
29
- transcription = transcriptor(audio)
30
- translation = translator(audio)
31
  return transcription["text"], translation["text"]
32
 
33
 
@@ -63,7 +52,7 @@ youtube_interface = gr.Interface(
63
  fn=youtube_transcribe,
64
  inputs="text",
65
  outputs=["text", "text"],
66
- title="Transcribir y traducir video de YouTube",
67
  )
68
 
69
  if __name__ == "__main__":
 
 
1
  import gradio as gr
2
+ import whisper
3
+ import numpy as np
4
  import pytube as pt
 
5
 
6
+ model = whisper.load_model("medium")
7
+ print(
8
+ f"Model is {'multilingual' if model.is_multilingual else 'English-only'} "
9
+ f"and has {sum(np.prod(p.shape) for p in model.parameters()):,} parameters."
 
 
 
 
10
  )
 
 
11
 
12
+ options = dict(language='es', beam_size=5, best_of=5)
13
+ transcribe_options = dict(task="transcribe", **options)
14
+ translate_options = dict(task="translate", **options)
 
 
 
 
 
15
 
16
 
17
  def transcribe(audio):
18
+ transcription = model.transcribe(audio, **transcribe_options)
19
+ translation = model.transcribe(audio, **translate_options)
20
  return transcription["text"], translation["text"]
21
 
22
 
 
52
  fn=youtube_transcribe,
53
  inputs="text",
54
  outputs=["text", "text"],
55
+ title="Transcribir y traducir audio",
56
  )
57
 
58
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -1,6 +1,74 @@
1
- transformers
2
- tensorflow
3
- torch
4
- sentencepiece
5
- numpy
6
- pytube
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.8.3
2
+ aiosignal==1.3.1
3
+ anyio==3.6.2
4
+ async-timeout==4.0.2
5
+ attrs==22.1.0
6
+ bcrypt==4.0.1
7
+ certifi==2022.9.24
8
+ cffi==1.15.1
9
+ charset-normalizer==2.1.1
10
+ click==8.1.3
11
+ contourpy==1.0.6
12
+ cryptography==38.0.4
13
+ cycler==0.11.0
14
+ fastapi==0.88.0
15
+ ffmpeg-python==0.2.0
16
+ ffmpy==0.3.0
17
+ filelock==3.8.2
18
+ fonttools==4.38.0
19
+ frozenlist==1.3.3
20
+ fsspec==2022.11.0
21
+ future==0.18.2
22
+ gradio==3.12.0
23
+ h11==0.12.0
24
+ httpcore==0.15.0
25
+ httpx==0.23.1
26
+ huggingface-hub==0.11.1
27
+ idna==3.4
28
+ Jinja2==3.1.2
29
+ kiwisolver==1.4.4
30
+ linkify-it-py==1.0.3
31
+ markdown-it-py==2.1.0
32
+ MarkupSafe==2.1.1
33
+ matplotlib==3.6.2
34
+ mdit-py-plugins==0.3.3
35
+ mdurl==0.1.2
36
+ more-itertools==9.0.0
37
+ multidict==6.0.3
38
+ numpy==1.23.5
39
+ orjson==3.8.3
40
+ packaging==21.3
41
+ pandas==1.5.2
42
+ paramiko==2.12.0
43
+ Pillow==9.3.0
44
+ pycparser==2.21
45
+ pycryptodome==3.16.0
46
+ pydantic==1.10.2
47
+ pydub==0.25.1
48
+ PyNaCl==1.5.0
49
+ pyparsing==3.0.9
50
+ python-dateutil==2.8.2
51
+ python-multipart==0.0.5
52
+ pytz==2022.6
53
+ PyYAML==6.0
54
+ regex==2022.10.31
55
+ requests==2.28.1
56
+ rfc3986==1.5.0
57
+ semantic-version==2.10.0
58
+ setuptools-rust==1.5.2
59
+ six==1.16.0
60
+ sniffio==1.3.0
61
+ starlette==0.22.0
62
+ tokenizers==0.13.2
63
+ torch==1.13.0
64
+ tqdm==4.64.1
65
+ transformers==4.25.1
66
+ typing_extensions==4.4.0
67
+ uc-micro-py==1.0.1
68
+ urllib3==1.26.13
69
+ uvicorn==0.20.0
70
+ websockets==10.4
71
+ whisper @ git+https://github.com/openai/whisper.git@fd8f80c8b880dd7c284c109ca7f03dbe978bc532
72
+ yarl==1.8.2
73
+
74
+ pytube~=12.1.0