Comment all to try to build into hugging face step by step
Browse files- requirements.txt +19 -19
- subtify.py +63 -61
requirements.txt
CHANGED
|
@@ -2,26 +2,26 @@
|
|
| 2 |
gradio
|
| 3 |
|
| 4 |
# Download youtube and twitch videos
|
| 5 |
-
pytube
|
| 6 |
-
twitch-dl
|
| 7 |
|
| 8 |
-
# mossformer --> separate speech audios
|
| 9 |
-
torch
|
| 10 |
-
torchvision
|
| 11 |
-
torchaudio
|
| 12 |
-
speechbrain
|
| 13 |
-
soundfile
|
| 14 |
-
modelscope
|
| 15 |
-
rotary-embedding-torch
|
| 16 |
-
transformers
|
| 17 |
|
| 18 |
-
# Trascribe audios
|
| 19 |
-
# git+https://github.com/openai/whisper.git
|
| 20 |
-
git+https://github.com/m-bain/whisperx.git
|
| 21 |
-
pyannote.audio
|
| 22 |
|
| 23 |
-
# Translate
|
| 24 |
-
protobuf
|
| 25 |
|
| 26 |
-
# Add subtitles to videos
|
| 27 |
-
opencv-python
|
|
|
|
| 2 |
gradio
|
| 3 |
|
| 4 |
# Download youtube and twitch videos
|
| 5 |
+
# pytube
|
| 6 |
+
# twitch-dl
|
| 7 |
|
| 8 |
+
# # mossformer --> separate speech audios
|
| 9 |
+
# torch
|
| 10 |
+
# torchvision
|
| 11 |
+
# torchaudio
|
| 12 |
+
# speechbrain
|
| 13 |
+
# soundfile
|
| 14 |
+
# modelscope
|
| 15 |
+
# rotary-embedding-torch
|
| 16 |
+
# transformers
|
| 17 |
|
| 18 |
+
# # Trascribe audios
|
| 19 |
+
# # git+https://github.com/openai/whisper.git
|
| 20 |
+
# git+https://github.com/m-bain/whisperx.git
|
| 21 |
+
# pyannote.audio
|
| 22 |
|
| 23 |
+
# # Translate
|
| 24 |
+
# protobuf
|
| 25 |
|
| 26 |
+
# # Add subtitles to videos
|
| 27 |
+
# opencv-python
|
subtify.py
CHANGED
|
@@ -11,7 +11,8 @@ import re
|
|
| 11 |
|
| 12 |
NUMBER = 100
|
| 13 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 14 |
-
|
|
|
|
| 15 |
SLICE_AUDIO = False
|
| 16 |
SEPARE_VOCALS = False
|
| 17 |
TRANSCRIBE_AUDIO = False
|
|
@@ -19,7 +20,7 @@ CONCATENATE_TRANSCRIPTIONS = False
|
|
| 19 |
TRANSLATE_TRANSCRIPTIONS = False
|
| 20 |
ADD_SUBTITLES_TO_VIDEO = False
|
| 21 |
REMOVE_FILES = False
|
| 22 |
-
REMOVE_ALL =
|
| 23 |
if SEPARE_VOCALS:
|
| 24 |
SECONDS = 150
|
| 25 |
else:
|
|
@@ -440,65 +441,66 @@ def add_translated_subtitles_to_video(original_video_path, original_audio_path,
|
|
| 440 |
def subtify():
|
| 441 |
with gr.Blocks() as demo:
|
| 442 |
# Layout
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
#
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
|
|
|
| 502 |
|
| 503 |
|
| 504 |
demo.launch()
|
|
|
|
| 11 |
|
| 12 |
NUMBER = 100
|
| 13 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 14 |
+
# DEVICE = "cpu"
|
| 15 |
+
DOWNLOAD = True
|
| 16 |
SLICE_AUDIO = False
|
| 17 |
SEPARE_VOCALS = False
|
| 18 |
TRANSCRIBE_AUDIO = False
|
|
|
|
| 20 |
TRANSLATE_TRANSCRIPTIONS = False
|
| 21 |
ADD_SUBTITLES_TO_VIDEO = False
|
| 22 |
REMOVE_FILES = False
|
| 23 |
+
REMOVE_ALL = False
|
| 24 |
if SEPARE_VOCALS:
|
| 25 |
SECONDS = 150
|
| 26 |
else:
|
|
|
|
| 441 |
def subtify():
|
| 442 |
with gr.Blocks() as demo:
|
| 443 |
# Layout
|
| 444 |
+
gr.Markdown("""# Subtify""")
|
| 445 |
+
# with gr.Row(variant="panel"):
|
| 446 |
+
# url_textbox = gr.Textbox(placeholder="Add video URL here", label="Video URL", elem_id="video_url", scale=1, interactive=True)
|
| 447 |
+
# copy_button = gr.Button(size="sm", icon="icons/copy.svg", value="", min_width="10px", scale=0)
|
| 448 |
+
# delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="", min_width="10px", scale=0)
|
| 449 |
+
|
| 450 |
+
# stream_page = gr.Textbox(label="Stream page", elem_id="stream_page", visible=False)
|
| 451 |
+
# visible = False
|
| 452 |
+
# with gr.Row(equal_height=False):
|
| 453 |
+
# image = gr.Image(visible=visible, scale=1)
|
| 454 |
+
# with gr.Column():
|
| 455 |
+
# with gr.Row():
|
| 456 |
+
# source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
| 457 |
+
# target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
| 458 |
+
# with gr.Row():
|
| 459 |
+
# subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible)
|
| 460 |
+
|
| 461 |
+
# original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
|
| 462 |
+
# original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=False)
|
| 463 |
+
# original_video_path = gr.Textbox(label="Original video path", elem_id="original_video_path", visible=False)
|
| 464 |
+
# original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
|
| 465 |
+
# original_audio_transcribed_path = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False)
|
| 466 |
+
# original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
|
| 467 |
+
# original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=False)
|
| 468 |
+
# subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False)
|
| 469 |
+
|
| 470 |
+
# # Events
|
| 471 |
+
# # copy_button.click(fn=copy_url_from_clipboard, outputs=url_textbox)
|
| 472 |
+
# delete_button.click(
|
| 473 |
+
# fn=clear_video_url,
|
| 474 |
+
# outputs=[
|
| 475 |
+
# url_textbox,
|
| 476 |
+
# image,
|
| 477 |
+
# source_languaje,
|
| 478 |
+
# target_languaje,
|
| 479 |
+
# subtify_button,
|
| 480 |
+
# original_audio,
|
| 481 |
+
# original_audio_transcribed,
|
| 482 |
+
# original_audio_translated,
|
| 483 |
+
# ]
|
| 484 |
+
# )
|
| 485 |
+
# url_textbox.change(
|
| 486 |
+
# fn=is_valid_url,
|
| 487 |
+
# inputs=url_textbox,
|
| 488 |
+
# outputs=[
|
| 489 |
+
# image,
|
| 490 |
+
# source_languaje,
|
| 491 |
+
# target_languaje,
|
| 492 |
+
# subtify_button,
|
| 493 |
+
# stream_page,
|
| 494 |
+
# original_audio,
|
| 495 |
+
# original_audio_transcribed,
|
| 496 |
+
# original_audio_translated,
|
| 497 |
+
# subtitled_video
|
| 498 |
+
# ]
|
| 499 |
+
# )
|
| 500 |
+
# subtify_button.click(fn=get_audio_and_video_from_video, inputs=[url_textbox, stream_page], outputs=[original_audio, original_audio_path, original_video_path])
|
| 501 |
+
# original_audio.change(fn=trascribe_audio, inputs=[original_audio_path, source_languaje], outputs=[original_audio_transcribed, original_audio_transcribed_path])
|
| 502 |
+
# original_audio_transcribed.change(fn=translate_transcription, inputs=[original_audio_transcribed_path, source_languaje, target_languaje], outputs=[original_audio_translated, original_audio_translated_path])
|
| 503 |
+
# original_audio_translated.change(fn=add_translated_subtitles_to_video, inputs=[original_video_path, original_audio_path, original_audio_translated_path], outputs=subtitled_video)
|
| 504 |
|
| 505 |
|
| 506 |
demo.launch()
|