Refactor project structure and update dependencies
Browse files- Modularize code by creating separate files for audio, transcription, and UI configuration
- Update requirements.txt to remove unnecessary dependencies
- Simplify audio extraction and transcription processes
- Modify app.py to use new modular structure
- Add new utility files like ui_config.py and audio.py
- Update .gitignore to include __pycache__
- .gitignore +2 -1
- app.py +137 -506
- audio.py +46 -0
- requirements.txt +21 -10
- slice_audio.py +53 -33
- transcribe.py +77 -40
- ui_config.py +57 -0
- url_manager.py +87 -0
.gitignore
CHANGED
|
@@ -14,4 +14,5 @@ sepformer.ipynb
|
|
| 14 |
modelscope.ipynb
|
| 15 |
audio_cache
|
| 16 |
*.png
|
| 17 |
-
.DS_Store
|
|
|
|
|
|
| 14 |
modelscope.ipynb
|
| 15 |
audio_cache
|
| 16 |
*.png
|
| 17 |
+
.DS_Store
|
| 18 |
+
__pycache__
|
app.py
CHANGED
|
@@ -3,14 +3,28 @@ import argparse
|
|
| 3 |
import spaces
|
| 4 |
import os
|
| 5 |
import torch
|
|
|
|
| 6 |
from time import sleep
|
| 7 |
from tqdm import tqdm
|
| 8 |
from lang_list import union_language_dict
|
| 9 |
# import pyperclip
|
| 10 |
-
from pytube import YouTube
|
| 11 |
import re
|
| 12 |
from PIL import Image
|
| 13 |
# import urllib.request
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
NUMBER = 100
|
| 16 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
@@ -25,317 +39,33 @@ REMOVE_FILES = True
|
|
| 25 |
if DEVICE == "cpu":
|
| 26 |
# I supose that I am on huggingface server
|
| 27 |
# Get RAM space
|
| 28 |
-
ram = int(os.popen("free -m | grep Mem | awk '{print $2}'").read())
|
|
|
|
| 29 |
factor = 1
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
| 32 |
else:
|
| 33 |
# I supose that I am on my computer
|
| 34 |
# Get VRAM space
|
| 35 |
-
|
|
|
|
| 36 |
|
| 37 |
YOUTUBE = "youtube"
|
| 38 |
TWITCH = "twitch"
|
| 39 |
ERROR = "error"
|
| 40 |
|
| 41 |
-
subtify_logo = Image.open("
|
| 42 |
subtify_logo_width, subtify_logo_height = subtify_logo.size
|
| 43 |
factor = 4
|
| 44 |
new_width = subtify_logo_width // factor
|
| 45 |
new_height = subtify_logo_height // factor
|
| 46 |
|
| 47 |
-
|
| 48 |
-
BUTTON_COLOR = "#47515f"
|
| 49 |
-
SVG_COLOR = "#f3f4f6"
|
| 50 |
-
PANEL_COLOR = "#101827"
|
| 51 |
-
PRIMARY_TEXT_COLOR = "#f3f4f6"
|
| 52 |
-
SUBDUED_TEXT_COLOR = "#59616f"
|
| 53 |
-
BACKGROUND_PRIMARY_COLOR = "#1f2937"
|
| 54 |
-
BACKGROUND_SECONDARY_COLOR = "#101827"
|
| 55 |
-
PRIMARY_BODER_COLOR = "#323c4c"
|
| 56 |
-
BLOCK_TITLE_TEXT_COLOR = "#dfe2e6"
|
| 57 |
-
INPUT_BACKGROUND_COLOR = "#2f3947"
|
| 58 |
-
INPUT_BORDER_COLOR = "#313b4b"
|
| 59 |
-
INPUT_PLACEHOLDER_COLOR = "#616977"
|
| 60 |
-
ERROR_BACKGROUND_COLOR = "#101827"
|
| 61 |
-
ERROR_TEXT_COLOR = "#f7f2f2"
|
| 62 |
-
ERROR_BORDER_COLOR = "#9b3339"
|
| 63 |
-
BUTTON_SECONDARY_BACKGROUND_COLOR = "#434d5c"
|
| 64 |
-
BUTTON_SECONDARY_BORDER_COLOR = "#444d5b"
|
| 65 |
-
BUTTON_SECONDARY_TEXT_COLOR = "#c5c9cc"
|
| 66 |
-
RED = "#ff0000"
|
| 67 |
-
GREEN = "#00ff00"
|
| 68 |
-
BLUE = "#0000ff"
|
| 69 |
-
|
| 70 |
-
html_social_media = f'''
|
| 71 |
-
<div style="float: right;">
|
| 72 |
-
<a href="https://maximofn.com/" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
|
| 73 |
-
<svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 576 512">
|
| 74 |
-
<style>
|
| 75 |
-
svg {"{"}
|
| 76 |
-
fill: {SVG_COLOR}
|
| 77 |
-
{"}"}
|
| 78 |
-
</style>
|
| 79 |
-
<path d="M208 80c0-26.5 21.5-48 48-48h64c26.5 0 48 21.5 48 48v64c0 26.5-21.5 48-48 48h-8v40H464c30.9 0 56 25.1 56 56v32h8c26.5 0 48 21.5 48 48v64c0 26.5-21.5 48-48 48H464c-26.5 0-48-21.5-48-48V368c0-26.5 21.5-48 48-48h8V288c0-4.4-3.6-8-8-8H312v40h8c26.5 0 48 21.5 48 48v64c0 26.5-21.5 48-48 48H256c-26.5 0-48-21.5-48-48V368c0-26.5 21.5-48 48-48h8V280H112c-4.4 0-8 3.6-8 8v32h8c26.5 0 48 21.5 48 48v64c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V368c0-26.5 21.5-48 48-48h8V288c0-30.9 25.1-56 56-56H264V192h-8c-26.5 0-48-21.5-48-48V80z"/>
|
| 80 |
-
</svg>
|
| 81 |
-
</a>
|
| 82 |
-
<a href="http://github.com/maximofn" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
|
| 83 |
-
<svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 496 512">
|
| 84 |
-
<style>
|
| 85 |
-
svg {"{"}
|
| 86 |
-
fill: {SVG_COLOR}
|
| 87 |
-
{"}"}
|
| 88 |
-
</style>
|
| 89 |
-
<path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"/>
|
| 90 |
-
</svg>
|
| 91 |
-
</a>
|
| 92 |
-
<a href="http://linkedin.com/in/MaximoFN/" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
|
| 93 |
-
<svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 448 512">
|
| 94 |
-
<style>
|
| 95 |
-
svg {"{"}
|
| 96 |
-
fill: {SVG_COLOR}
|
| 97 |
-
{"}"}
|
| 98 |
-
</style>
|
| 99 |
-
<path d="M416 32H31.9C14.3 32 0 46.5 0 64.3v383.4C0 465.5 14.3 480 31.9 480H416c17.6 0 32-14.5 32-32.3V64.3c0-17.8-14.4-32.3-32-32.3zM135.4 416H69V202.2h66.5V416zm-33.2-243c-21.3 0-38.5-17.3-38.5-38.5S80.9 96 102.2 96c21.2 0 38.5 17.3 38.5 38.5 0 21.3-17.2 38.5-38.5 38.5zm282.1 243h-66.4V312c0-24.8-.5-56.7-34.5-56.7-34.6 0-39.9 27-39.9 54.9V416h-66.4V202.2h63.7v29.2h.9c8.9-16.8 30.6-34.5 62.9-34.5 67.2 0 79.7 44.3 79.7 101.9V416z"/>
|
| 100 |
-
</svg>
|
| 101 |
-
</a>
|
| 102 |
-
<a href="http://kaggle.com/maximofn" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
|
| 103 |
-
<svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 320 512">
|
| 104 |
-
<style>
|
| 105 |
-
svg {"{"}
|
| 106 |
-
fill: {SVG_COLOR}
|
| 107 |
-
{"}"}
|
| 108 |
-
</style>
|
| 109 |
-
<path d="M304.2 501.5L158.4 320.3 298.2 185c2.6-2.7 1.7-10.5-5.3-10.5h-69.2c-3.5 0-7 1.8-10.5 5.3L80.9 313.5V7.5q0-7.5-7.5-7.5H21.5Q14 0 14 7.5v497q0 7.5 7.5 7.5h51.9q7.5 0 7.5-7.5v-109l30.8-29.3 110.5 140.6c3 3.5 6.5 5.3 10.5 5.3h66.9q5.25 0 6-3z"/>
|
| 110 |
-
</svg>
|
| 111 |
-
</a>
|
| 112 |
-
<a href="https://twitter.com/Maximo_fn" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
|
| 113 |
-
<svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 512 512">
|
| 114 |
-
<style>
|
| 115 |
-
svg {"{"}
|
| 116 |
-
fill: {SVG_COLOR}
|
| 117 |
-
{"}"}
|
| 118 |
-
</style>
|
| 119 |
-
<path d="M389.2 48h70.6L305.6 224.2 487 464H345L233.7 318.6 106.5 464H35.8L200.7 275.5 26.8 48H172.4L272.9 180.9 389.2 48zM364.4 421.8h39.1L151.1 88h-42L364.4 421.8z"/>
|
| 120 |
-
</svg>
|
| 121 |
-
</a>
|
| 122 |
-
<a href="https://www.instagram.com/maximo__fn/" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
|
| 123 |
-
<svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 448 512">
|
| 124 |
-
<style>
|
| 125 |
-
svg {"{"}
|
| 126 |
-
fill: {SVG_COLOR}
|
| 127 |
-
{"}"}
|
| 128 |
-
</style>
|
| 129 |
-
<path d="M224.1 141c-63.6 0-114.9 51.3-114.9 114.9s51.3 114.9 114.9 114.9S339 319.5 339 255.9 287.7 141 224.1 141zm0 189.6c-41.1 0-74.7-33.5-74.7-74.7s33.5-74.7 74.7-74.7 74.7 33.5 74.7 74.7-33.6 74.7-74.7 74.7zm146.4-194.3c0 14.9-12 26.8-26.8 26.8-14.9 0-26.8-12-26.8-26.8s12-26.8 26.8-26.8 26.8 12 26.8 26.8zm76.1 27.2c-1.7-35.9-9.9-67.7-36.2-93.9-26.2-26.2-58-34.4-93.9-36.2-37-2.1-147.9-2.1-184.9 0-35.8 1.7-67.6 9.9-93.9 36.1s-34.4 58-36.2 93.9c-2.1 37-2.1 147.9 0 184.9 1.7 35.9 9.9 67.7 36.2 93.9s58 34.4 93.9 36.2c37 2.1 147.9 2.1 184.9 0 35.9-1.7 67.7-9.9 93.9-36.2 26.2-26.2 34.4-58 36.2-93.9 2.1-37 2.1-147.8 0-184.8zM398.8 388c-7.8 19.6-22.9 34.7-42.6 42.6-29.5 11.7-99.5 9-132.1 9s-102.7 2.6-132.1-9c-19.6-7.8-34.7-22.9-42.6-42.6-11.7-29.5-9-99.5-9-132.1s-2.6-102.7 9-132.1c7.8-19.6 22.9-34.7 42.6-42.6 29.5-11.7 99.5-9 132.1-9s102.7-2.6 132.1 9c19.6 7.8 34.7 22.9 42.6 42.6 11.7 29.5 9 99.5 9 132.1s2.7 102.7-9 132.1z"/>
|
| 130 |
-
</svg>
|
| 131 |
-
</a>
|
| 132 |
-
<a href="https://www.youtube.com/channel/UCdQwg2JU_fWRsHn3yIlf3tw" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
|
| 133 |
-
<svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 576 512">
|
| 134 |
-
<style>
|
| 135 |
-
svg {"{"}
|
| 136 |
-
fill: {SVG_COLOR}
|
| 137 |
-
{"}"}
|
| 138 |
-
</style>
|
| 139 |
-
<path d="M549.655 124.083c-6.281-23.65-24.787-42.276-48.284-48.597C458.781 64 288 64 288 64S117.22 64 74.629 75.486c-23.497 6.322-42.003 24.947-48.284 48.597-11.412 42.867-11.412 132.305-11.412 132.305s0 89.438 11.412 132.305c6.281 23.65 24.787 41.5 48.284 47.821C117.22 448 288 448 288 448s170.78 0 213.371-11.486c23.497-6.321 42.003-24.171 48.284-47.821 11.412-42.867 11.412-132.305 11.412-132.305s0-89.438-11.412-132.305zm-317.51 213.508V175.185l142.739 81.205-142.739 81.201z"/>
|
| 140 |
-
</svg>
|
| 141 |
-
</a>
|
| 142 |
-
<a href="https://www.facebook.com/profile.php?id=100085177670661" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
|
| 143 |
-
<svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 512 512">
|
| 144 |
-
<style>
|
| 145 |
-
svg {"{"}
|
| 146 |
-
fill: {SVG_COLOR}
|
| 147 |
-
{"}"}
|
| 148 |
-
</style>
|
| 149 |
-
<path d="M504 256C504 119 393 8 256 8S8 119 8 256c0 123.78 90.69 226.38 209.25 245V327.69h-63V256h63v-54.64c0-62.15 37-96.48 93.67-96.48 27.14 0 55.52 4.84 55.52 4.84v61h-31.28c-30.8 0-40.41 19.12-40.41 38.73V256h68.78l-11 71.69h-57.78V501C413.31 482.38 504 379.78 504 256z"/>
|
| 150 |
-
</svg>
|
| 151 |
-
</a>
|
| 152 |
-
<a href="https://www.tiktok.com/@maximo__fn" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
|
| 153 |
-
<svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 448 512">
|
| 154 |
-
<style>
|
| 155 |
-
svg {"{"}
|
| 156 |
-
fill: {SVG_COLOR}
|
| 157 |
-
{"}"}
|
| 158 |
-
</style>
|
| 159 |
-
<path d="M448,209.91a210.06,210.06,0,0,1-122.77-39.25V349.38A162.55,162.55,0,1,1,185,188.31V278.2a74.62,74.62,0,1,0,52.23,71.18V0l88,0a121.18,121.18,0,0,0,1.86,22.17h0A122.18,122.18,0,0,0,381,102.39a121.43,121.43,0,0,0,67,20.14Z"/>
|
| 160 |
-
</svg>
|
| 161 |
-
</a>
|
| 162 |
-
<a href="https://www.twitch.tv/maximofn/" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
|
| 163 |
-
<svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 512 512">
|
| 164 |
-
<style>
|
| 165 |
-
svg {"{"}
|
| 166 |
-
fill: {SVG_COLOR}
|
| 167 |
-
{"}"}
|
| 168 |
-
</style>
|
| 169 |
-
<path d="M391.17,103.47H352.54v109.7h38.63ZM285,103H246.37V212.75H285ZM120.83,0,24.31,91.42V420.58H140.14V512l96.53-91.42h77.25L487.69,256V0ZM449.07,237.75l-77.22,73.12H294.61l-67.6,64v-64H140.14V36.58H449.07Z"/>
|
| 170 |
-
</svg>
|
| 171 |
-
</a>
|
| 172 |
-
</div>
|
| 173 |
-
'''
|
| 174 |
-
|
| 175 |
-
html_subtify_logo = f"""
|
| 176 |
-
<div style="display: flex; justify-content: center; align-items: center;">
|
| 177 |
-
<img src='https://pub-fb664c455eca46a2ba762a065ac900f7.r2.dev/subtify_logo-scaled.webp' width={new_width}px height={new_height}px >
|
| 178 |
-
</div>
|
| 179 |
-
"""
|
| 180 |
-
|
| 181 |
-
html_buy_me_a_coffe = '''
|
| 182 |
-
<div style="float: right;">
|
| 183 |
-
<a href="https://www.buymeacoffee.com/maximofn" target="_blank">
|
| 184 |
-
<img src="https://img.shields.io/badge/Buy_Me_A_Coffee-support_my_work-FFDD00?style=for-the-badge&logo=buy-me-a-coffee&logoColor=white&labelColor=101010" alt="buy me a coffe">
|
| 185 |
-
</a>
|
| 186 |
-
</div>
|
| 187 |
-
'''
|
| 188 |
|
| 189 |
language_dict = union_language_dict()
|
| 190 |
|
| 191 |
-
# def subtify_no_ui():
|
| 192 |
-
# number_works = 6
|
| 193 |
-
# progress_bar = tqdm(total=number_works, desc="Subtify")
|
| 194 |
-
# folder_chunck = "chunks"
|
| 195 |
-
# folder_concatenated = "concatenated_transcriptions"
|
| 196 |
-
# folder_translated_transcriptions = "translated_transcriptions"
|
| 197 |
-
# if not os.path.exists(folder_chunck):
|
| 198 |
-
# os.makedirs(folder_chunck)
|
| 199 |
-
# if not os.path.exists(folder_concatenated):
|
| 200 |
-
# os.makedirs(folder_concatenated)
|
| 201 |
-
# if not os.path.exists(folder_translated_transcriptions):
|
| 202 |
-
# os.makedirs(folder_translated_transcriptions)
|
| 203 |
-
|
| 204 |
-
# ################## Download video and audio ##################
|
| 205 |
-
# if DOWNLOAD:
|
| 206 |
-
# print('*'*NUMBER)
|
| 207 |
-
# # url = "https://www.twitch.tv/videos/1936119752" # twitch Rob Mula 2 horas
|
| 208 |
-
# # url = "https://www.youtube.com/watch?v=yX5EJf4R77s" # ✅ debate, varios hablantes, 3 minutos
|
| 209 |
-
# # url = "https://www.youtube.com/watch?v=cgx0QnXo1OU" # ✅ smart home, un solo hablante, 4:42 minutos
|
| 210 |
-
# # url = "https://www.youtube.com/watch?v=dgOBxhi19T8" # ✅ rob mula, muchos hablantes, 4:28 minutos
|
| 211 |
-
# # url = "https://www.youtube.com/watch?v=Coj72EzmX20" # rob mula, un solo hablante, 16 minutos
|
| 212 |
-
# # url = "https://www.youtube.com/watch?v=Tqth0fKo0_g" # Conversación short
|
| 213 |
-
# url = "https://www.youtube.com/watch?v=h9xPrgTYP_0" # Letitia 40 segundos
|
| 214 |
-
# print(f"Downloading video and audio from {url}")
|
| 215 |
-
# python_file = "download.py"
|
| 216 |
-
# command = f"python {python_file} {url}"
|
| 217 |
-
# os.system(command)
|
| 218 |
-
# sleep(1)
|
| 219 |
-
# print('*'*NUMBER)
|
| 220 |
-
# print("\n\n")
|
| 221 |
-
# progress_bar.update(1)
|
| 222 |
-
|
| 223 |
-
# ################## Slice audio ##################
|
| 224 |
-
# if SLICE_AUDIO:
|
| 225 |
-
# print('*'*NUMBER)
|
| 226 |
-
# print("Slicing audio")
|
| 227 |
-
# python_file = "slice_audio.py"
|
| 228 |
-
# audio = "audios/download_audio.mp3"
|
| 229 |
-
# command = f"python {python_file} {audio} {SECONDS}"
|
| 230 |
-
# os.system(command)
|
| 231 |
-
# print('*'*NUMBER)
|
| 232 |
-
# print("\n\n")
|
| 233 |
-
# progress_bar.update(1)
|
| 234 |
-
|
| 235 |
-
# ################# Transcript slices ##################
|
| 236 |
-
# if TRANSCRIBE_AUDIO:
|
| 237 |
-
# print('*'*NUMBER)
|
| 238 |
-
# print("Transcript slices")
|
| 239 |
-
# chunks_folder = "chunks"
|
| 240 |
-
# if not os.path.exists(chunks_folder):
|
| 241 |
-
# os.makedirs(chunks_folder)
|
| 242 |
-
# python_file = "transcribe.py"
|
| 243 |
-
# chunks_file = "chunks/output_files.txt"
|
| 244 |
-
# number_of_speakers = 10
|
| 245 |
-
# source_languaje = "English"
|
| 246 |
-
# command = f"python {python_file} {chunks_file} {source_languaje} {number_of_speakers} {DEVICE}"
|
| 247 |
-
# os.system(command)
|
| 248 |
-
# if REMOVE_FILES:
|
| 249 |
-
# with open(chunks_file, 'r') as f:
|
| 250 |
-
# files = f.read().splitlines()
|
| 251 |
-
# for file in files:
|
| 252 |
-
# audios_extension = "mp3"
|
| 253 |
-
# file_name, _ = file.split(".")
|
| 254 |
-
# _, file_name = file_name.split("/")
|
| 255 |
-
# vocal = f'{chunks_folder}/{file_name}.{audios_extension}'
|
| 256 |
-
# command = f"rm {vocal}"
|
| 257 |
-
# os.system(command)
|
| 258 |
-
# print('*'*NUMBER)
|
| 259 |
-
# print("\n\n")
|
| 260 |
-
# progress_bar.update(1)
|
| 261 |
-
|
| 262 |
-
# ################## Concatenate transcriptions ##################
|
| 263 |
-
# if CONCATENATE_TRANSCRIPTIONS:
|
| 264 |
-
# print('*'*NUMBER)
|
| 265 |
-
# print("Concatenate transcriptions")
|
| 266 |
-
# folder_concatenated = "concatenated_transcriptions"
|
| 267 |
-
# if not os.path.exists(folder_concatenated):
|
| 268 |
-
# os.makedirs(folder_concatenated)
|
| 269 |
-
|
| 270 |
-
# chunck_file = "chunks/output_files.txt"
|
| 271 |
-
# python_file = "concat_transcriptions.py"
|
| 272 |
-
# command = f"python {python_file} {chunck_file} {SECONDS}"
|
| 273 |
-
# os.system(command)
|
| 274 |
-
# if REMOVE_FILES:
|
| 275 |
-
# with open(chunck_file, 'r') as f:
|
| 276 |
-
# files = f.read().splitlines()
|
| 277 |
-
# for file in files:
|
| 278 |
-
# file_name, _ = file.split(".")
|
| 279 |
-
# _, file_name = file_name.split("/")
|
| 280 |
-
# transcriptions_folder = "transcriptions"
|
| 281 |
-
# transcription_extension = "srt"
|
| 282 |
-
# command = f"rm {transcriptions_folder}/{file_name}.{transcription_extension}"
|
| 283 |
-
# os.system(command)
|
| 284 |
-
# print('*'*NUMBER)
|
| 285 |
-
# print("\n\n")
|
| 286 |
-
# progress_bar.update(1)
|
| 287 |
-
|
| 288 |
-
# ################## Translate transcription ##################
|
| 289 |
-
# target_languaje = "Español"
|
| 290 |
-
# if TRANSLATE_TRANSCRIPTIONS:
|
| 291 |
-
# print('*'*NUMBER)
|
| 292 |
-
# print("Translate transcription")
|
| 293 |
-
# transcription_file = "concatenated_transcriptions/download_audio.srt"
|
| 294 |
-
# source_languaje = "English"
|
| 295 |
-
# python_file = "translate_transcriptions.py"
|
| 296 |
-
# command = f"python {python_file} {transcription_file} --source_languaje {source_languaje} --target_languaje {target_languaje} --device {DEVICE}"
|
| 297 |
-
# os.system(command)
|
| 298 |
-
# if REMOVE_FILES:
|
| 299 |
-
# if os.path.exists(transcription_file):
|
| 300 |
-
# command = f"rm {transcription_file}"
|
| 301 |
-
# os.system(command)
|
| 302 |
-
# print('*'*NUMBER)
|
| 303 |
-
# print("\n\n")
|
| 304 |
-
# progress_bar.update(1)
|
| 305 |
-
|
| 306 |
-
# ################## Add subtitles to video ##################
|
| 307 |
-
# if ADD_SUBTITLES_TO_VIDEO:
|
| 308 |
-
# print('*'*NUMBER)
|
| 309 |
-
# print("Add subtitles to video")
|
| 310 |
-
# python_file = "add_subtitles_to_video.py"
|
| 311 |
-
# transcription_file = f"translated_transcriptions/download_audio_{target_languaje}.srt"
|
| 312 |
-
# input_video_file = "videos/download_video.mp4"
|
| 313 |
-
# input_audio_file = "audios/download_audio.mp3"
|
| 314 |
-
# command = f"python {python_file} {transcription_file} {input_video_file} {input_audio_file}"
|
| 315 |
-
# os.system(command)
|
| 316 |
-
# if REMOVE_FILES:
|
| 317 |
-
# if os.path.exists(input_video_file):
|
| 318 |
-
# command = f"rm {input_video_file}"
|
| 319 |
-
# os.system(command)
|
| 320 |
-
# if os.path.exists(input_audio_file):
|
| 321 |
-
# command = f"rm {input_audio_file}"
|
| 322 |
-
# os.system(command)
|
| 323 |
-
# if os.path.exists(transcription_file):
|
| 324 |
-
# command = f"rm {transcription_file}"
|
| 325 |
-
# os.system(command)
|
| 326 |
-
# if os.path.exists("chunks/output_files.txt"):
|
| 327 |
-
# command = f"rm chunks/output_files.txt"
|
| 328 |
-
# os.system(command)
|
| 329 |
-
# if os.path.exists("chunks"):
|
| 330 |
-
# command = f"rm -r chunks"
|
| 331 |
-
# os.system(command)
|
| 332 |
-
# if os.path.exists("vocals/speakers.txt"):
|
| 333 |
-
# command = f"rm vocals/speakers.txt"
|
| 334 |
-
# os.system(command)
|
| 335 |
-
# print('*'*NUMBER)
|
| 336 |
-
# print("\n\n")
|
| 337 |
-
# progress_bar.update(1)
|
| 338 |
-
|
| 339 |
def remove_all_files():
|
| 340 |
if os.path.exists("audios"):
|
| 341 |
command = f"rm -r audios"
|
|
@@ -359,13 +89,10 @@ def remove_all_files():
|
|
| 359 |
command = f"rm -r vocals"
|
| 360 |
os.system(command)
|
| 361 |
|
| 362 |
-
# def paste_url_from_clipboard():
|
| 363 |
-
# return pyperclip.paste()
|
| 364 |
-
|
| 365 |
def reset_frontend():
|
| 366 |
visible = False
|
| 367 |
return (
|
| 368 |
-
|
| 369 |
gr.Image(visible=visible),
|
| 370 |
gr.Dropdown(visible=visible),
|
| 371 |
gr.Dropdown(visible=visible),
|
|
@@ -381,142 +108,47 @@ def reset_frontend():
|
|
| 381 |
gr.Textbox(visible=visible),
|
| 382 |
gr.Textbox(visible=visible),
|
| 383 |
gr.Textbox(visible=visible),
|
| 384 |
-
gr.Textbox(visible=visible),
|
| 385 |
-
gr.Textbox(visible=visible),
|
| 386 |
gr.Video(visible=visible),
|
| 387 |
)
|
| 388 |
|
| 389 |
def show_auxiliar_block1():
|
| 390 |
return gr.Textbox(value="URL checked", visible=False)
|
| 391 |
|
| 392 |
-
def get_youtube_thumbnail(url):
|
| 393 |
-
yt = YouTube(url)
|
| 394 |
-
thumbnail_url = yt.thumbnail_url
|
| 395 |
-
return thumbnail_url
|
| 396 |
-
|
| 397 |
-
def is_valid_youtube_url(url):
|
| 398 |
-
# This regular expression should match the following YouTube URL formats:
|
| 399 |
-
# - https://youtube.com/watch?v=video_id
|
| 400 |
-
# - https://www.youtube.com/watch?v=video_id
|
| 401 |
-
# - https://youtu.be/video_id
|
| 402 |
-
patron_youtube = r'(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)[\w-]+'
|
| 403 |
-
return bool(re.match(patron_youtube, url))
|
| 404 |
-
|
| 405 |
-
def is_valid_twitch_url(url):
|
| 406 |
-
# This regular expression should match the following Twitch URL formats:
|
| 407 |
-
# - https://twitch.tv/channel_name
|
| 408 |
-
# - https://www.twitch.tv/channel_name
|
| 409 |
-
# - https://twitch.tv/videos/video_id
|
| 410 |
-
twitch_pattern = r'(https?://)?(www\.)?twitch\.tv/(videos/\d+|\w+)'
|
| 411 |
-
return bool(re.match(twitch_pattern, url))
|
| 412 |
-
|
| 413 |
-
def is_valid_url(url):
|
| 414 |
-
num_speaker = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
|
| 415 |
-
source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
| 416 |
-
target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
| 417 |
-
advanced_setings = gr.Accordion(visible=True)
|
| 418 |
-
number_of_speakers = gr.Dropdown(visible=True, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True)
|
| 419 |
-
subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=True)
|
| 420 |
-
|
| 421 |
-
# Youtube
|
| 422 |
-
if "youtube" in url.lower() or "youtu.be" in url.lower():
|
| 423 |
-
if is_valid_youtube_url(url):
|
| 424 |
-
thumbnail = get_youtube_thumbnail(url)
|
| 425 |
-
if thumbnail:
|
| 426 |
-
return (
|
| 427 |
-
gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False),
|
| 428 |
-
source_languaje,
|
| 429 |
-
target_languaje,
|
| 430 |
-
advanced_setings,
|
| 431 |
-
number_of_speakers,
|
| 432 |
-
subtify_button,
|
| 433 |
-
)
|
| 434 |
-
else:
|
| 435 |
-
return (
|
| 436 |
-
gr.Image(value="assets/youtube-no-thumbnails.webp", visible=True, show_download_button=False, container=False),
|
| 437 |
-
source_languaje,
|
| 438 |
-
target_languaje,
|
| 439 |
-
advanced_setings,
|
| 440 |
-
number_of_speakers,
|
| 441 |
-
subtify_button,
|
| 442 |
-
)
|
| 443 |
-
|
| 444 |
-
# Twitch
|
| 445 |
-
elif "twitch" in url.lower() or "twitch.tv" in url.lower():
|
| 446 |
-
if is_valid_twitch_url(url):
|
| 447 |
-
return (
|
| 448 |
-
gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False),
|
| 449 |
-
source_languaje,
|
| 450 |
-
target_languaje,
|
| 451 |
-
advanced_setings,
|
| 452 |
-
number_of_speakers,
|
| 453 |
-
subtify_button,
|
| 454 |
-
)
|
| 455 |
-
|
| 456 |
-
# Error
|
| 457 |
-
visible = False
|
| 458 |
-
image = gr.Image(value="assets/youtube_error.webp", visible=visible, show_download_button=False, container=False)
|
| 459 |
-
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
| 460 |
-
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
| 461 |
-
advanced_setings = gr.Accordion(visible=visible)
|
| 462 |
-
number_of_speakers = gr.Dropdown(visible=visible, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True)
|
| 463 |
-
subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible)
|
| 464 |
-
return (
|
| 465 |
-
image,
|
| 466 |
-
source_languaje,
|
| 467 |
-
target_languaje,
|
| 468 |
-
advanced_setings,
|
| 469 |
-
number_of_speakers,
|
| 470 |
-
subtify_button,
|
| 471 |
-
)
|
| 472 |
-
|
| 473 |
def change_visibility_texboxes():
|
| 474 |
-
|
| 475 |
return (
|
| 476 |
-
gr.
|
| 477 |
-
gr.
|
| 478 |
-
gr.
|
| 479 |
-
gr.
|
| 480 |
-
gr.
|
| 481 |
-
gr.
|
| 482 |
-
gr.
|
| 483 |
-
gr.Textbox(visible=False),
|
| 484 |
)
|
| 485 |
|
| 486 |
-
def
|
| 487 |
print('*'*NUMBER)
|
| 488 |
-
print(f"
|
| 489 |
-
|
| 490 |
-
audios_folder = "audios"
|
| 491 |
-
videos_folder = "videos"
|
| 492 |
-
if not os.path.exists(audios_folder):
|
| 493 |
-
os.makedirs(audios_folder)
|
| 494 |
-
if not os.path.exists(videos_folder):
|
| 495 |
-
os.makedirs(videos_folder)
|
| 496 |
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
gr.Textbox(value=audio),
|
| 513 |
-
gr.Textbox(value=video),
|
| 514 |
-
)
|
| 515 |
-
|
| 516 |
-
def slice_audio(audio_path):
|
| 517 |
print('*'*NUMBER)
|
| 518 |
-
print("Slicing audio")
|
| 519 |
|
|
|
|
|
|
|
| 520 |
folder_vocals = "vocals"
|
| 521 |
folder_chunck = "chunks"
|
| 522 |
if not os.path.exists(folder_vocals):
|
|
@@ -524,34 +156,22 @@ def slice_audio(audio_path):
|
|
| 524 |
if not os.path.exists(folder_chunck):
|
| 525 |
os.makedirs(folder_chunck)
|
| 526 |
|
| 527 |
-
|
| 528 |
-
command = f"python {python_file} {audio_path} {SECONDS}"
|
| 529 |
-
os.system(command)
|
| 530 |
|
| 531 |
return (
|
| 532 |
-
gr.
|
| 533 |
)
|
| 534 |
|
| 535 |
-
def trascribe_audio(
|
| 536 |
print('*'*NUMBER)
|
| 537 |
-
print("Transcript
|
| 538 |
-
|
| 539 |
-
folder_chunks = "chunks"
|
| 540 |
-
python_file = "transcribe.py"
|
| 541 |
-
chunks_file = "chunks/output_files.txt"
|
| 542 |
-
command = f"python {python_file} {chunks_file} {source_languaje} {number_of_speakers} {DEVICE}"
|
| 543 |
-
os.system(command)
|
| 544 |
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
for file in files:
|
| 548 |
-
audios_extension = "mp3"
|
| 549 |
-
file_name, _ = file.split(".")
|
| 550 |
-
_, file_name = file_name.split("/")
|
| 551 |
-
vocal = f'{folder_chunks}/{file_name}.{audios_extension}'
|
| 552 |
-
command = f"rm {vocal}"
|
| 553 |
-
os.system(command)
|
| 554 |
|
|
|
|
|
|
|
|
|
|
| 555 |
return (
|
| 556 |
gr.Textbox(value="Ok")
|
| 557 |
)
|
|
@@ -566,7 +186,7 @@ def concatenate_transcriptions():
|
|
| 566 |
|
| 567 |
chunck_file = "chunks/output_files.txt"
|
| 568 |
python_file = "concat_transcriptions.py"
|
| 569 |
-
command = f"python {python_file} {chunck_file} {
|
| 570 |
os.system(command)
|
| 571 |
|
| 572 |
with open(chunck_file, 'r') as f:
|
|
@@ -651,6 +271,23 @@ def hide_textbobes_progress_info():
|
|
| 651 |
gr.Textbox(value="Waiting", visible=visible),
|
| 652 |
)
|
| 653 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 654 |
@spaces.GPU
|
| 655 |
def subtify():
|
| 656 |
with gr.Blocks(
|
|
@@ -700,54 +337,59 @@ def subtify():
|
|
| 700 |
gr.HTML(html_social_media)
|
| 701 |
gr.HTML("<h1 style='text-align: center;'>Subtify</h1>")
|
| 702 |
gr.HTML(html_subtify_logo)
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
# paste_button = gr.Button(size="sm", icon="icons/paste.svg", value="paste", min_width="10px", scale=0)
|
| 706 |
-
delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="clear", min_width="10px", scale=0)
|
| 707 |
-
|
| 708 |
visible = False
|
| 709 |
-
|
| 710 |
-
with
|
| 711 |
-
|
| 712 |
-
with
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 713 |
with gr.Row():
|
| 714 |
-
source_languaje = gr.Dropdown(visible=
|
| 715 |
-
target_languaje = gr.Dropdown(visible=
|
| 716 |
-
with gr.Accordion("Advanced settings", open=False, visible=
|
| 717 |
-
number_of_speakers = gr.Dropdown(visible=
|
| 718 |
-
subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=
|
| 719 |
|
| 720 |
-
|
| 721 |
with gr.Row():
|
| 722 |
-
|
| 723 |
-
video_sliced_progress_info = gr.Textbox(placeholder="Waiting", label="Video slice progress info", elem_id="video_sliced_progress_info", interactive=False, visible=visible)
|
| 724 |
video_transcribed_progress_info = gr.Textbox(placeholder="Waiting", label="Transcribe progress info", elem_id="video_transcribed_progress_info", interactive=False, visible=visible)
|
| 725 |
transcriptions_concatenated_progress_info = gr.Textbox(placeholder="Waiting", label="Concatenate progress info", elem_id="transcriptions_concatenated_progress_info", interactive=False, visible=visible)
|
| 726 |
video_translated_progress_info = gr.Textbox(placeholder="Waiting", label="Translate progress info", elem_id="transcription_translated_progress_info", interactive=False, visible=visible)
|
| 727 |
video_subtitled_progress_info = gr.Textbox(placeholder="Waiting", label="Video subtitle progress info", elem_id="video_subtitled_progress_info", interactive=False, visible=visible)
|
| 728 |
|
| 729 |
original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=visible)
|
| 730 |
-
original_video_path = gr.Textbox(label="Original video path",
|
| 731 |
original_audio_transcribed_path = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", visible=visible)
|
| 732 |
original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=visible)
|
| 733 |
subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=visible)
|
| 734 |
auxiliar_block3 = gr.Textbox(placeholder="Waiting", label="Auxiliar block 3", elem_id="auxiliar_block3", interactive=False, visible=visible)
|
| 735 |
|
|
|
|
|
|
|
| 736 |
# Events
|
| 737 |
# paste_button.click(fn=paste_url_from_clipboard, outputs=url_textbox)
|
| 738 |
delete_button.click(
|
| 739 |
fn=reset_frontend,
|
| 740 |
outputs=[
|
| 741 |
-
|
| 742 |
-
image,
|
| 743 |
source_languaje,
|
| 744 |
target_languaje,
|
| 745 |
Advanced_setings,
|
| 746 |
number_of_speakers,
|
| 747 |
subtify_button,
|
| 748 |
-
|
| 749 |
-
video_donwloaded_progress_info,
|
| 750 |
-
video_sliced_progress_info,
|
| 751 |
video_transcribed_progress_info,
|
| 752 |
transcriptions_concatenated_progress_info,
|
| 753 |
video_translated_progress_info,
|
|
@@ -755,54 +397,43 @@ def subtify():
|
|
| 755 |
subtitled_video,
|
| 756 |
]
|
| 757 |
)
|
| 758 |
-
|
| 759 |
-
fn=
|
| 760 |
-
|
| 761 |
-
|
| 762 |
-
auxiliar_block1.change(
|
| 763 |
-
fn=is_valid_url,
|
| 764 |
-
inputs=url_textbox,
|
| 765 |
-
outputs=[image, source_languaje, target_languaje, Advanced_setings, number_of_speakers, subtify_button]
|
| 766 |
)
|
| 767 |
subtify_button.click(
|
| 768 |
fn=change_visibility_texboxes,
|
| 769 |
-
outputs=[
|
| 770 |
-
)
|
| 771 |
-
auxiliar_block2.change(
|
| 772 |
-
fn=get_audio_and_video_from_video,
|
| 773 |
-
inputs=[url_textbox],
|
| 774 |
-
outputs=[video_donwloaded_progress_info, original_audio_path, original_video_path]
|
| 775 |
)
|
| 776 |
-
|
| 777 |
-
fn=
|
| 778 |
-
inputs=[
|
| 779 |
-
outputs=[
|
| 780 |
)
|
| 781 |
-
|
| 782 |
fn=trascribe_audio,
|
| 783 |
-
inputs=[
|
| 784 |
outputs=[video_transcribed_progress_info]
|
| 785 |
)
|
| 786 |
-
video_transcribed_progress_info.change(
|
| 787 |
-
|
| 788 |
-
|
| 789 |
-
)
|
| 790 |
-
transcriptions_concatenated_progress_info.change(
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
)
|
| 795 |
-
video_translated_progress_info.change(
|
| 796 |
-
|
| 797 |
-
|
| 798 |
-
|
| 799 |
-
)
|
| 800 |
-
auxiliar_block3.change(
|
| 801 |
-
|
| 802 |
-
|
| 803 |
-
)
|
| 804 |
-
|
| 805 |
-
gr.HTML(html_buy_me_a_coffe)
|
| 806 |
|
| 807 |
demo.launch()
|
| 808 |
|
|
|
|
| 3 |
import spaces
|
| 4 |
import os
|
| 5 |
import torch
|
| 6 |
+
import shutil
|
| 7 |
from time import sleep
|
| 8 |
from tqdm import tqdm
|
| 9 |
from lang_list import union_language_dict
|
| 10 |
# import pyperclip
|
|
|
|
| 11 |
import re
|
| 12 |
from PIL import Image
|
| 13 |
# import urllib.request
|
| 14 |
+
from ui_config import (
|
| 15 |
+
BACKGROUND_COLOR, BUTTON_COLOR, SVG_COLOR, PANEL_COLOR,
|
| 16 |
+
PRIMARY_TEXT_COLOR, SUBDUED_TEXT_COLOR, BACKGROUND_PRIMARY_COLOR,
|
| 17 |
+
BACKGROUND_SECONDARY_COLOR, PRIMARY_BODER_COLOR, BLOCK_TITLE_TEXT_COLOR,
|
| 18 |
+
INPUT_BACKGROUND_COLOR, INPUT_BORDER_COLOR, INPUT_PLACEHOLDER_COLOR,
|
| 19 |
+
ERROR_BACKGROUND_COLOR, ERROR_TEXT_COLOR, ERROR_BORDER_COLOR,
|
| 20 |
+
BUTTON_SECONDARY_BACKGROUND_COLOR, BUTTON_SECONDARY_BORDER_COLOR,
|
| 21 |
+
BUTTON_SECONDARY_TEXT_COLOR, RED, GREEN, BLUE,
|
| 22 |
+
html_social_media, get_html_subtify_logo, html_buy_me_a_coffe
|
| 23 |
+
)
|
| 24 |
+
# from url_manager import get_youtube_thumbnail, is_valid_youtube_url, is_valid_twitch_url, is_valid_url
|
| 25 |
+
from slice_audio import slice_audio as slice_audio_main
|
| 26 |
+
from audio import get_audio_from_video
|
| 27 |
+
from transcribe import transcribe, get_language_dict
|
| 28 |
|
| 29 |
NUMBER = 100
|
| 30 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 39 |
if DEVICE == "cpu":
|
| 40 |
# I supose that I am on huggingface server
|
| 41 |
# Get RAM space
|
| 42 |
+
# ram = int(os.popen("free -m | grep Mem | awk '{print $2}'").read())
|
| 43 |
+
ram = 16000
|
| 44 |
factor = 1
|
| 45 |
+
CHUNK_SECONDS = int(ram*factor)
|
| 46 |
+
CHUNK_SECONDS = 30
|
| 47 |
+
CHUNK_OVERLAP_SECONDS = 5
|
| 48 |
+
print(f"RAM: {ram}, CHUNK_SECONDS: {CHUNK_SECONDS}, CHUNK_OVERLAP_SECONDS: {CHUNK_OVERLAP_SECONDS}")
|
| 49 |
else:
|
| 50 |
# I supose that I am on my computer
|
| 51 |
# Get VRAM space
|
| 52 |
+
CHUNK_SECONDS = 30
|
| 53 |
+
CHUNK_OVERLAP_SECONDS = 5
|
| 54 |
|
| 55 |
YOUTUBE = "youtube"
|
| 56 |
TWITCH = "twitch"
|
| 57 |
ERROR = "error"
|
| 58 |
|
| 59 |
+
subtify_logo = Image.open("assets/subtify_logo-scaled.png")
|
| 60 |
subtify_logo_width, subtify_logo_height = subtify_logo.size
|
| 61 |
factor = 4
|
| 62 |
new_width = subtify_logo_width // factor
|
| 63 |
new_height = subtify_logo_height // factor
|
| 64 |
|
| 65 |
+
html_subtify_logo = get_html_subtify_logo(new_width, new_height)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
language_dict = union_language_dict()
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
def remove_all_files():
|
| 70 |
if os.path.exists("audios"):
|
| 71 |
command = f"rm -r audios"
|
|
|
|
| 89 |
command = f"rm -r vocals"
|
| 90 |
os.system(command)
|
| 91 |
|
|
|
|
|
|
|
|
|
|
| 92 |
def reset_frontend():
|
| 93 |
visible = False
|
| 94 |
return (
|
| 95 |
+
None,
|
| 96 |
gr.Image(visible=visible),
|
| 97 |
gr.Dropdown(visible=visible),
|
| 98 |
gr.Dropdown(visible=visible),
|
|
|
|
| 108 |
gr.Textbox(visible=visible),
|
| 109 |
gr.Textbox(visible=visible),
|
| 110 |
gr.Textbox(visible=visible),
|
|
|
|
|
|
|
| 111 |
gr.Video(visible=visible),
|
| 112 |
)
|
| 113 |
|
| 114 |
def show_auxiliar_block1():
|
| 115 |
return gr.Textbox(value="URL checked", visible=False)
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
def change_visibility_texboxes():
|
|
|
|
| 118 |
return (
|
| 119 |
+
gr.update(value="Done"), # auxiliar_block1
|
| 120 |
+
gr.update(visible=True), # get_audio_from_video_info
|
| 121 |
+
gr.update(visible=True), # video_sliced_progress_info
|
| 122 |
+
gr.update(visible=True), # video_transcribed_progress_info
|
| 123 |
+
gr.update(visible=True), # transcriptions_concatenated_progress_info
|
| 124 |
+
gr.update(visible=True), # video_translated_progress_info
|
| 125 |
+
gr.update(visible=True), # video_subtitled_progress_info
|
|
|
|
| 126 |
)
|
| 127 |
|
| 128 |
+
def get_audio(video_path):
|
| 129 |
print('*'*NUMBER)
|
| 130 |
+
print(f"Getting audio from video {video_path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
+
audios_folder = "audios"
|
| 133 |
+
try:
|
| 134 |
+
audio_path = get_audio_from_video(video_path, audios_folder)
|
| 135 |
+
return [
|
| 136 |
+
gr.update(value="Ok"), # get_audio_from_video_info
|
| 137 |
+
gr.update(value=audio_path) # original_audio_path
|
| 138 |
+
]
|
| 139 |
+
except Exception as e:
|
| 140 |
+
print(f"Error: {str(e)}")
|
| 141 |
+
return [
|
| 142 |
+
gr.update(value="Error"), # get_audio_from_video_info
|
| 143 |
+
gr.update(value="") # original_audio_path
|
| 144 |
+
]
|
| 145 |
+
|
| 146 |
+
def slice_audio(input_audio_path):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
print('*'*NUMBER)
|
| 148 |
+
print(f"Slicing audio {input_audio_path} in chunks of {CHUNK_SECONDS} seconds with {CHUNK_OVERLAP_SECONDS} seconds overlap")
|
| 149 |
|
| 150 |
+
# Create vocals and chunks folders
|
| 151 |
+
print("Creating vocals and chunks folders")
|
| 152 |
folder_vocals = "vocals"
|
| 153 |
folder_chunck = "chunks"
|
| 154 |
if not os.path.exists(folder_vocals):
|
|
|
|
| 156 |
if not os.path.exists(folder_chunck):
|
| 157 |
os.makedirs(folder_chunck)
|
| 158 |
|
| 159 |
+
slice_audio_main(input_audio_path, folder_chunck, CHUNK_SECONDS, CHUNK_OVERLAP_SECONDS)
|
|
|
|
|
|
|
| 160 |
|
| 161 |
return (
|
| 162 |
+
gr.update(value="Ok"), # video_sliced_progress_info
|
| 163 |
)
|
| 164 |
|
| 165 |
+
def trascribe_audio(input_audio_path, source_languaje):
|
| 166 |
print('*'*NUMBER)
|
| 167 |
+
print(f"Transcript {input_audio_path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
|
| 169 |
+
# Get language dict
|
| 170 |
+
language_dict = get_language_dict()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
|
| 172 |
+
# Transcribe audio file
|
| 173 |
+
transcribe(input_audio_path, language_dict[source_languaje]["transcriber"], DEVICE, CHUNK_SECONDS, CHUNK_OVERLAP_SECONDS)
|
| 174 |
+
|
| 175 |
return (
|
| 176 |
gr.Textbox(value="Ok")
|
| 177 |
)
|
|
|
|
| 186 |
|
| 187 |
chunck_file = "chunks/output_files.txt"
|
| 188 |
python_file = "concat_transcriptions.py"
|
| 189 |
+
command = f"python {python_file} {chunck_file} {CHUNK_SECONDS} {CHUNK_OVERLAP_SECONDS}"
|
| 190 |
os.system(command)
|
| 191 |
|
| 192 |
with open(chunck_file, 'r') as f:
|
|
|
|
| 271 |
gr.Textbox(value="Waiting", visible=visible),
|
| 272 |
)
|
| 273 |
|
| 274 |
+
def process_uploaded_video(video_path):
|
| 275 |
+
# Create videos folder
|
| 276 |
+
videos_folder = "videos"
|
| 277 |
+
if not os.path.exists(videos_folder):
|
| 278 |
+
os.makedirs(videos_folder)
|
| 279 |
+
|
| 280 |
+
# Copy uploaded video to videos folder
|
| 281 |
+
new_video_path = os.path.join(videos_folder, "download_video.mp4")
|
| 282 |
+
shutil.copy(video_path, new_video_path)
|
| 283 |
+
|
| 284 |
+
# Return updated config block with new scale and the new video path
|
| 285 |
+
return [
|
| 286 |
+
gr.update(label="Video uploaded"), # video_input
|
| 287 |
+
gr.update(visible=True), # config_block
|
| 288 |
+
gr.update(value=new_video_path) # original_video_path
|
| 289 |
+
]
|
| 290 |
+
|
| 291 |
@spaces.GPU
|
| 292 |
def subtify():
|
| 293 |
with gr.Blocks(
|
|
|
|
| 337 |
gr.HTML(html_social_media)
|
| 338 |
gr.HTML("<h1 style='text-align: center;'>Subtify</h1>")
|
| 339 |
gr.HTML(html_subtify_logo)
|
| 340 |
+
|
| 341 |
+
# Input block, where the user can upload a video and configure the subtify process
|
|
|
|
|
|
|
|
|
|
| 342 |
visible = False
|
| 343 |
+
input_block = gr.Row(variant="panel")
|
| 344 |
+
with input_block:
|
| 345 |
+
input_video_block = gr.Row(scale=2)
|
| 346 |
+
with input_video_block:
|
| 347 |
+
video_input = gr.Video(
|
| 348 |
+
label="Upload video",
|
| 349 |
+
sources=["upload"],
|
| 350 |
+
scale=1,
|
| 351 |
+
interactive=True
|
| 352 |
+
)
|
| 353 |
+
delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="clear", min_width="10px", scale=0)
|
| 354 |
+
|
| 355 |
+
config_block = gr.Column(scale=1, visible=visible)
|
| 356 |
+
with config_block:
|
| 357 |
with gr.Row():
|
| 358 |
+
source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True, info="Language of the video")
|
| 359 |
+
target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True, info="Language to translate the subtitles")
|
| 360 |
+
with gr.Accordion("Advanced settings", open=False, visible=True) as Advanced_setings:
|
| 361 |
+
number_of_speakers = gr.Dropdown(visible=True, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True, info="Number of speakers in the video, if you don't know, select 10")
|
| 362 |
+
subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=True)
|
| 363 |
|
| 364 |
+
auxiliar_block1 = gr.Textbox(placeholder="", interactive=False, visible=visible)
|
| 365 |
with gr.Row():
|
| 366 |
+
get_audio_from_video_info = gr.Textbox(placeholder="Waiting", label="Get audio from video info", elem_id="get_audio_from_video_info", interactive=False, visible=visible)
|
|
|
|
| 367 |
video_transcribed_progress_info = gr.Textbox(placeholder="Waiting", label="Transcribe progress info", elem_id="video_transcribed_progress_info", interactive=False, visible=visible)
|
| 368 |
transcriptions_concatenated_progress_info = gr.Textbox(placeholder="Waiting", label="Concatenate progress info", elem_id="transcriptions_concatenated_progress_info", interactive=False, visible=visible)
|
| 369 |
video_translated_progress_info = gr.Textbox(placeholder="Waiting", label="Translate progress info", elem_id="transcription_translated_progress_info", interactive=False, visible=visible)
|
| 370 |
video_subtitled_progress_info = gr.Textbox(placeholder="Waiting", label="Video subtitle progress info", elem_id="video_subtitled_progress_info", interactive=False, visible=visible)
|
| 371 |
|
| 372 |
original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=visible)
|
| 373 |
+
original_video_path = gr.Textbox(label="Original video path", visible=visible)
|
| 374 |
original_audio_transcribed_path = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", visible=visible)
|
| 375 |
original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=visible)
|
| 376 |
subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=visible)
|
| 377 |
auxiliar_block3 = gr.Textbox(placeholder="Waiting", label="Auxiliar block 3", elem_id="auxiliar_block3", interactive=False, visible=visible)
|
| 378 |
|
| 379 |
+
gr.HTML(html_buy_me_a_coffe)
|
| 380 |
+
|
| 381 |
# Events
|
| 382 |
# paste_button.click(fn=paste_url_from_clipboard, outputs=url_textbox)
|
| 383 |
delete_button.click(
|
| 384 |
fn=reset_frontend,
|
| 385 |
outputs=[
|
| 386 |
+
video_input,
|
|
|
|
| 387 |
source_languaje,
|
| 388 |
target_languaje,
|
| 389 |
Advanced_setings,
|
| 390 |
number_of_speakers,
|
| 391 |
subtify_button,
|
| 392 |
+
auxiliar_block1,
|
|
|
|
|
|
|
| 393 |
video_transcribed_progress_info,
|
| 394 |
transcriptions_concatenated_progress_info,
|
| 395 |
video_translated_progress_info,
|
|
|
|
| 397 |
subtitled_video,
|
| 398 |
]
|
| 399 |
)
|
| 400 |
+
video_input.change(
|
| 401 |
+
fn=process_uploaded_video,
|
| 402 |
+
inputs=[video_input],
|
| 403 |
+
outputs=[video_input, config_block, original_video_path]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 404 |
)
|
| 405 |
subtify_button.click(
|
| 406 |
fn=change_visibility_texboxes,
|
| 407 |
+
outputs=[auxiliar_block1, get_audio_from_video_info, video_transcribed_progress_info, transcriptions_concatenated_progress_info, video_translated_progress_info, video_subtitled_progress_info]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 408 |
)
|
| 409 |
+
auxiliar_block1.change(
|
| 410 |
+
fn=get_audio,
|
| 411 |
+
inputs=[original_video_path],
|
| 412 |
+
outputs=[get_audio_from_video_info, original_audio_path]
|
| 413 |
)
|
| 414 |
+
get_audio_from_video_info.change(
|
| 415 |
fn=trascribe_audio,
|
| 416 |
+
inputs=[original_audio_path, source_languaje],
|
| 417 |
outputs=[video_transcribed_progress_info]
|
| 418 |
)
|
| 419 |
+
# video_transcribed_progress_info.change(
|
| 420 |
+
# fn=concatenate_transcriptions,
|
| 421 |
+
# outputs=[transcriptions_concatenated_progress_info, original_audio_transcribed_path]
|
| 422 |
+
# )
|
| 423 |
+
# transcriptions_concatenated_progress_info.change(
|
| 424 |
+
# fn=translate_transcription,
|
| 425 |
+
# inputs=[original_audio_transcribed_path, source_languaje, target_languaje],
|
| 426 |
+
# outputs=[video_translated_progress_info, original_audio_translated_path]
|
| 427 |
+
# )
|
| 428 |
+
# video_translated_progress_info.change(
|
| 429 |
+
# fn=add_translated_subtitles_to_video,
|
| 430 |
+
# inputs=[original_video_path, original_audio_path, original_audio_translated_path],
|
| 431 |
+
# outputs=[subtitled_video, video_subtitled_progress_info, auxiliar_block3]
|
| 432 |
+
# )
|
| 433 |
+
# auxiliar_block3.change(
|
| 434 |
+
# fn=hide_textbobes_progress_info,
|
| 435 |
+
# outputs=[video_sliced_progress_info, video_transcribed_progress_info, transcriptions_concatenated_progress_info, video_translated_progress_info, video_subtitled_progress_info]
|
| 436 |
+
# )
|
|
|
|
|
|
|
| 437 |
|
| 438 |
demo.launch()
|
| 439 |
|
audio.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import ffmpeg
|
| 3 |
+
|
| 4 |
+
DEBUG=True
|
| 5 |
+
|
| 6 |
+
def get_audio_from_video(video_path: str, output_folder: str) -> str:
|
| 7 |
+
"""
|
| 8 |
+
Extract audio from video and save it as mp3.
|
| 9 |
+
|
| 10 |
+
Args:
|
| 11 |
+
video_path (str): Path to the video file
|
| 12 |
+
output_folder (str): Path to folder where audio will be saved
|
| 13 |
+
|
| 14 |
+
Returns:
|
| 15 |
+
str: Path to the saved audio file
|
| 16 |
+
|
| 17 |
+
Raises:
|
| 18 |
+
Exception: If video file doesn't exist
|
| 19 |
+
Exception: If there's an error extracting the audio
|
| 20 |
+
"""
|
| 21 |
+
# Validate video exists
|
| 22 |
+
if not os.path.exists(video_path):
|
| 23 |
+
raise Exception(f"Video file not found: {video_path}")
|
| 24 |
+
|
| 25 |
+
# Create output folder if it doesn't exist
|
| 26 |
+
if not os.path.exists(output_folder):
|
| 27 |
+
os.makedirs(output_folder)
|
| 28 |
+
|
| 29 |
+
try:
|
| 30 |
+
# Generate output path
|
| 31 |
+
audio_filename = "download_audio.mp3"
|
| 32 |
+
audio_path = os.path.join(output_folder, audio_filename)
|
| 33 |
+
|
| 34 |
+
if DEBUG:
|
| 35 |
+
if os.path.exists(audio_path):
|
| 36 |
+
return audio_path
|
| 37 |
+
|
| 38 |
+
# Extract audio using ffmpeg
|
| 39 |
+
stream = ffmpeg.input(video_path)
|
| 40 |
+
stream = ffmpeg.output(stream, audio_path, acodec='libmp3lame')
|
| 41 |
+
ffmpeg.run(stream, overwrite_output=True)
|
| 42 |
+
|
| 43 |
+
return audio_path
|
| 44 |
+
|
| 45 |
+
except Exception as e:
|
| 46 |
+
raise Exception(f"Error extracting audio from video: {str(e)}")
|
requirements.txt
CHANGED
|
@@ -1,20 +1,31 @@
|
|
| 1 |
# gradio
|
| 2 |
gradio
|
| 3 |
|
| 4 |
-
#
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
# Download youtube and twitch videos
|
| 8 |
-
pytube
|
| 9 |
-
yt-dlp
|
| 10 |
-
twitch-dl
|
| 11 |
|
| 12 |
# Trascribe audios
|
| 13 |
-
git+https://github.com/m-bain/whisperx.git
|
| 14 |
pyannote.audio
|
| 15 |
|
| 16 |
-
# Translate
|
| 17 |
-
protobuf
|
| 18 |
|
| 19 |
-
# Add subtitles to videos
|
| 20 |
-
opencv-python
|
|
|
|
| 1 |
# gradio
|
| 2 |
gradio
|
| 3 |
|
| 4 |
+
# spaces
|
| 5 |
+
spaces
|
| 6 |
+
|
| 7 |
+
# pytorch
|
| 8 |
+
torch
|
| 9 |
+
torchvision
|
| 10 |
+
torchaudio
|
| 11 |
+
|
| 12 |
+
# Transformers
|
| 13 |
+
transformers accelerate
|
| 14 |
+
|
| 15 |
+
# ffmpeg
|
| 16 |
+
ffmpeg-python
|
| 17 |
|
| 18 |
# Download youtube and twitch videos
|
| 19 |
+
# pytube
|
| 20 |
+
# yt-dlp
|
| 21 |
+
# twitch-dl
|
| 22 |
|
| 23 |
# Trascribe audios
|
| 24 |
+
# git+https://github.com/m-bain/whisperx.git
|
| 25 |
pyannote.audio
|
| 26 |
|
| 27 |
+
# # Translate
|
| 28 |
+
# protobuf
|
| 29 |
|
| 30 |
+
# # Add subtitles to videos
|
| 31 |
+
# opencv-python
|
slice_audio.py
CHANGED
|
@@ -4,6 +4,7 @@ from tqdm import tqdm
|
|
| 4 |
|
| 5 |
START = 00
|
| 6 |
FOLDER = "chunks"
|
|
|
|
| 7 |
|
| 8 |
def seconds_to_hms(seconds):
|
| 9 |
hour = 00
|
|
@@ -22,53 +23,72 @@ def seconds_to_hms(seconds):
|
|
| 22 |
def hms_to_seconds(hour, minute, second):
|
| 23 |
return hour*3600 + minute*60 + second
|
| 24 |
|
| 25 |
-
def
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
name, extension = os.path.splitext(filename)
|
| 30 |
-
seconds = int(args.seconds)
|
| 31 |
|
| 32 |
# Get audio duration in seconds
|
| 33 |
-
duration = float(os.popen(f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {
|
| 34 |
hour, minute, second = seconds_to_hms(int(duration))
|
|
|
|
| 35 |
|
| 36 |
-
#
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
-
# Slice audio into
|
| 40 |
-
hour, minute, second = seconds_to_hms(seconds) # Duration of each chunk
|
| 41 |
output_files = []
|
| 42 |
progress_bar = tqdm(total=num_chunks, desc="Slice audio into chunks progress")
|
|
|
|
| 43 |
for chunk in range(num_chunks):
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
else:
|
| 57 |
-
command = f'ffmpeg -i {
|
|
|
|
|
|
|
| 58 |
os.system(command)
|
| 59 |
-
|
| 60 |
output_files.append(output)
|
| 61 |
-
|
| 62 |
progress_bar.update(1)
|
| 63 |
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
| 66 |
for output_file in output_files:
|
| 67 |
f.write(f"{output_file}\n")
|
| 68 |
-
|
| 69 |
-
if __name__ == "__main__":
|
| 70 |
-
argparser = argparse.ArgumentParser(description='Slice audio into smaller chunks')
|
| 71 |
-
argparser.add_argument('input', help='Input audio file')
|
| 72 |
-
argparser.add_argument('seconds', help='Duration of each chunk in seconds')
|
| 73 |
-
args = argparser.parse_args()
|
| 74 |
-
main(args)
|
|
|
|
| 4 |
|
| 5 |
START = 00
|
| 6 |
FOLDER = "chunks"
|
| 7 |
+
DEBUG = True
|
| 8 |
|
| 9 |
def seconds_to_hms(seconds):
|
| 10 |
hour = 00
|
|
|
|
| 23 |
def hms_to_seconds(hour, minute, second):
|
| 24 |
return hour*3600 + minute*60 + second
|
| 25 |
|
| 26 |
+
def slice_audio(input_audio_path, output_folder, chunks_seconds, chunk_overlap_seconds):
|
| 27 |
+
"""
|
| 28 |
+
Slice audio into chunks with specified duration and overlap.
|
| 29 |
+
|
| 30 |
+
Args:
|
| 31 |
+
input_audio_path (str): Path to input audio file
|
| 32 |
+
output_folder (str): Path to output folder
|
| 33 |
+
chunks_seconds (int): Duration of each chunk in seconds
|
| 34 |
+
chunk_overlap_seconds (int): Overlap between chunks in seconds
|
| 35 |
+
"""
|
| 36 |
+
_, filename = os.path.split(input_audio_path)
|
| 37 |
name, extension = os.path.splitext(filename)
|
|
|
|
| 38 |
|
| 39 |
# Get audio duration in seconds
|
| 40 |
+
duration = float(os.popen(f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {input_audio_path}').read())
|
| 41 |
hour, minute, second = seconds_to_hms(int(duration))
|
| 42 |
+
print(f"\tDuration ({duration} seconds): {hour:02d}:{minute:02d}:{second:02d}")
|
| 43 |
|
| 44 |
+
# Calculate effective chunk duration considering overlap
|
| 45 |
+
effective_chunk = chunks_seconds - chunk_overlap_seconds
|
| 46 |
+
|
| 47 |
+
# Calculate number of chunks needed
|
| 48 |
+
if effective_chunk > 0:
|
| 49 |
+
num_chunks = -(-int(duration - chunk_overlap_seconds) // effective_chunk) # Ceiling division
|
| 50 |
+
else:
|
| 51 |
+
raise ValueError("Overlap duration must be less than chunk duration")
|
| 52 |
|
| 53 |
+
# Slice audio into chunks with overlap
|
|
|
|
| 54 |
output_files = []
|
| 55 |
progress_bar = tqdm(total=num_chunks, desc="Slice audio into chunks progress")
|
| 56 |
+
|
| 57 |
for chunk in range(num_chunks):
|
| 58 |
+
# Calculate start and end times for this chunk
|
| 59 |
+
start_time = chunk * effective_chunk
|
| 60 |
+
end_time = min(start_time + chunks_seconds, duration)
|
| 61 |
+
|
| 62 |
+
# Convert times to HH:MM:SS format
|
| 63 |
+
hour_start, minute_start, second_start = seconds_to_hms(start_time)
|
| 64 |
+
|
| 65 |
+
# Calculate chunk duration
|
| 66 |
+
chunk_duration = end_time - start_time
|
| 67 |
+
hour_duration, minute_duration, second_duration = seconds_to_hms(chunk_duration)
|
| 68 |
+
|
| 69 |
+
# Generate output filename
|
| 70 |
+
output = f"{output_folder}/{name}_chunk{chunk:003d}{extension}"
|
| 71 |
|
| 72 |
+
if DEBUG:
|
| 73 |
+
if os.path.exists(output):
|
| 74 |
+
output_files.append(output)
|
| 75 |
+
progress_bar.update(1)
|
| 76 |
+
continue
|
| 77 |
+
|
| 78 |
+
# Build ffmpeg command with -y flag to overwrite without asking
|
| 79 |
+
if chunk == num_chunks - 1: # Last chunk
|
| 80 |
+
command = f'ffmpeg -y -i {input_audio_path} -ss {hour_start:02d}:{minute_start:02d}:{second_start:02d} -loglevel error {output}'
|
| 81 |
else:
|
| 82 |
+
command = f'ffmpeg -y -i {input_audio_path} -ss {hour_start:02d}:{minute_start:02d}:{second_start:02d} -t {hour_duration:02d}:{minute_duration:02d}:{second_duration:02d} -loglevel error {output}'
|
| 83 |
+
|
| 84 |
+
# Execute command
|
| 85 |
os.system(command)
|
|
|
|
| 86 |
output_files.append(output)
|
|
|
|
| 87 |
progress_bar.update(1)
|
| 88 |
|
| 89 |
+
progress_bar.close()
|
| 90 |
+
|
| 91 |
+
# Write output files to a txt file (with overwrite)
|
| 92 |
+
with open(f"{output_folder}/output_files.txt", "w") as f:
|
| 93 |
for output_file in output_files:
|
| 94 |
f.write(f"{output_file}\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
transcribe.py
CHANGED
|
@@ -2,50 +2,86 @@ import os
|
|
| 2 |
import argparse
|
| 3 |
from lang_list import LANGUAGE_NAME_TO_CODE, WHISPER_LANGUAGES
|
| 4 |
from tqdm import tqdm
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
# For pyannote.audio diarize
|
| 7 |
-
from pyannote.audio import Model
|
| 8 |
-
model = Model.from_pretrained("pyannote/segmentation-3.0", use_auth_token="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")
|
| 9 |
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
| 23 |
|
| 24 |
-
def transcribe(audio_file, language,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
output_folder = "transcriptions"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
-
|
| 28 |
-
model = "large-v2"
|
| 29 |
-
# word_timestamps = True
|
| 30 |
-
print_progress = False
|
| 31 |
-
if device == "cpu":
|
| 32 |
-
# I supose that I am on huggingface server
|
| 33 |
-
compute_type = "float32"
|
| 34 |
-
else:
|
| 35 |
-
compute_type = "float16"
|
| 36 |
-
fp16 = True
|
| 37 |
-
batch_size = 8
|
| 38 |
-
verbose = False
|
| 39 |
-
min_speakers = 1
|
| 40 |
-
max_speakers = num_speakers
|
| 41 |
-
threads = 4
|
| 42 |
-
output_format = "srt"
|
| 43 |
-
hf_token = "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"
|
| 44 |
-
command = f'whisperx {audio_file} --model {model} --batch_size {batch_size} --compute_type {compute_type} \
|
| 45 |
-
--output_dir {output_folder} --output_format {output_format} --verbose {verbose} --language {language} \
|
| 46 |
-
--fp16 {fp16} --threads {threads} --print_progress {print_progress} --device {device} \
|
| 47 |
-
--diarize --max_speakers {max_speakers} --min_speakers {min_speakers} --hf_token {hf_token}'
|
| 48 |
-
os.system(command)
|
| 49 |
|
| 50 |
if __name__ == "__main__":
|
| 51 |
parser = argparse.ArgumentParser(description='Transcribe audio files')
|
|
@@ -66,5 +102,6 @@ if __name__ == "__main__":
|
|
| 66 |
_, input_name = input_file.split('/')
|
| 67 |
extension = "mp3"
|
| 68 |
file = f'{chunks_folder}/{input_name}.{extension}'
|
|
|
|
| 69 |
transcribe(file, language_dict[args.language]["transcriber"], args.num_speakers, args.device)
|
| 70 |
-
progress_bar.update(1)
|
|
|
|
| 2 |
import argparse
|
| 3 |
from lang_list import LANGUAGE_NAME_TO_CODE, WHISPER_LANGUAGES
|
| 4 |
from tqdm import tqdm
|
| 5 |
+
import torch
|
| 6 |
+
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
| 7 |
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
def get_language_dict():
|
| 10 |
+
language_dict = {}
|
| 11 |
+
# Iterate over the LANGUAGE_NAME_TO_CODE dictionary
|
| 12 |
+
for language_name, language_code in LANGUAGE_NAME_TO_CODE.items():
|
| 13 |
+
# Extract the language code (the first two characters before the underscore)
|
| 14 |
+
lang_code = language_code.split('_')[0].lower()
|
| 15 |
+
|
| 16 |
+
# Check if the language code is present in WHISPER_LANGUAGES
|
| 17 |
+
if lang_code in WHISPER_LANGUAGES:
|
| 18 |
+
# Construct the entry for the resulting dictionary
|
| 19 |
+
language_dict[language_name] = {
|
| 20 |
+
"transcriber": lang_code,
|
| 21 |
+
"translator": language_code
|
| 22 |
+
}
|
| 23 |
+
return language_dict
|
| 24 |
|
| 25 |
+
def transcribe(audio_file, language, device, chunk_length_s=30, stride_length_s=5):
|
| 26 |
+
"""
|
| 27 |
+
Transcribe audio file using Whisper model.
|
| 28 |
+
|
| 29 |
+
Args:
|
| 30 |
+
audio_file (str): Path to audio file
|
| 31 |
+
language (str): Language code for transcription
|
| 32 |
+
device (str): Device to use for inference ('cuda' or 'cpu')
|
| 33 |
+
chunk_length_s (int): Length of audio chunks in seconds
|
| 34 |
+
stride_length_s (int): Stride length between chunks in seconds
|
| 35 |
+
"""
|
| 36 |
output_folder = "transcriptions"
|
| 37 |
+
if not os.path.exists(output_folder):
|
| 38 |
+
os.makedirs(output_folder)
|
| 39 |
+
|
| 40 |
+
# Get output filename
|
| 41 |
+
audio_filename = os.path.basename(audio_file)
|
| 42 |
+
filename_without_ext = os.path.splitext(audio_filename)[0]
|
| 43 |
+
output_file = os.path.join(output_folder, f"{filename_without_ext}.srt")
|
| 44 |
+
|
| 45 |
+
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
| 46 |
+
|
| 47 |
+
# Load model and processor
|
| 48 |
+
model_id = "openai/whisper-large-v3-turbo"
|
| 49 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
| 50 |
+
model_id,
|
| 51 |
+
torch_dtype=torch_dtype,
|
| 52 |
+
low_cpu_mem_usage=True,
|
| 53 |
+
use_safetensors=True
|
| 54 |
+
)
|
| 55 |
+
model.to(device)
|
| 56 |
+
|
| 57 |
+
processor = AutoProcessor.from_pretrained(model_id)
|
| 58 |
+
|
| 59 |
+
# Create pipeline with timestamp generation
|
| 60 |
+
pipe = pipeline(
|
| 61 |
+
"automatic-speech-recognition",
|
| 62 |
+
model=model,
|
| 63 |
+
tokenizer=processor.tokenizer,
|
| 64 |
+
feature_extractor=processor.feature_extractor,
|
| 65 |
+
torch_dtype=torch_dtype,
|
| 66 |
+
device=device,
|
| 67 |
+
chunk_length_s=chunk_length_s,
|
| 68 |
+
stride_length_s=stride_length_s,
|
| 69 |
+
return_timestamps=True
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
# Transcribe with timestamps and generate attention mask
|
| 73 |
+
result = pipe(
|
| 74 |
+
audio_file,
|
| 75 |
+
return_timestamps=True,
|
| 76 |
+
generate_kwargs={
|
| 77 |
+
"language": language,
|
| 78 |
+
"task": "transcribe",
|
| 79 |
+
"use_cache": True,
|
| 80 |
+
"num_beams": 1
|
| 81 |
+
}
|
| 82 |
+
)
|
| 83 |
|
| 84 |
+
print(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
if __name__ == "__main__":
|
| 87 |
parser = argparse.ArgumentParser(description='Transcribe audio files')
|
|
|
|
| 102 |
_, input_name = input_file.split('/')
|
| 103 |
extension = "mp3"
|
| 104 |
file = f'{chunks_folder}/{input_name}.{extension}'
|
| 105 |
+
language_dict = get_language_dict()
|
| 106 |
transcribe(file, language_dict[args.language]["transcriber"], args.num_speakers, args.device)
|
| 107 |
+
progress_bar.update(1)
|
ui_config.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Definición de colores
|
| 2 |
+
BACKGROUND_COLOR = "#0b0f19"
|
| 3 |
+
BUTTON_COLOR = "#47515f"
|
| 4 |
+
SVG_COLOR = "#f3f4f6"
|
| 5 |
+
PANEL_COLOR = "#101827"
|
| 6 |
+
PRIMARY_TEXT_COLOR = "#f3f4f6"
|
| 7 |
+
SUBDUED_TEXT_COLOR = "#59616f"
|
| 8 |
+
BACKGROUND_PRIMARY_COLOR = "#1f2937"
|
| 9 |
+
BACKGROUND_SECONDARY_COLOR = "#101827"
|
| 10 |
+
PRIMARY_BODER_COLOR = "#323c4c"
|
| 11 |
+
BLOCK_TITLE_TEXT_COLOR = "#dfe2e6"
|
| 12 |
+
INPUT_BACKGROUND_COLOR = "#2f3947"
|
| 13 |
+
INPUT_BORDER_COLOR = "#313b4b"
|
| 14 |
+
INPUT_PLACEHOLDER_COLOR = "#616977"
|
| 15 |
+
ERROR_BACKGROUND_COLOR = "#101827"
|
| 16 |
+
ERROR_TEXT_COLOR = "#f7f2f2"
|
| 17 |
+
ERROR_BORDER_COLOR = "#9b3339"
|
| 18 |
+
BUTTON_SECONDARY_BACKGROUND_COLOR = "#434d5c"
|
| 19 |
+
BUTTON_SECONDARY_BORDER_COLOR = "#444d5b"
|
| 20 |
+
BUTTON_SECONDARY_TEXT_COLOR = "#c5c9cc"
|
| 21 |
+
RED = "#ff0000"
|
| 22 |
+
GREEN = "#00ff00"
|
| 23 |
+
BLUE = "#0000ff"
|
| 24 |
+
|
| 25 |
+
# HTML para redes sociales
|
| 26 |
+
html_social_media = f'''
|
| 27 |
+
<div style="float: right;">
|
| 28 |
+
<a href="https://maximofn.com/" rel="noopener noreferrer" aria-disabled="false" class="sm secondary svelte-cmf5ev" id="component-1" style="flex-grow: 100;" target="_blank">
|
| 29 |
+
<svg xmlns="http://www.w3.org/2000/svg" height="1em" viewBox="0 0 576 512">
|
| 30 |
+
<style>
|
| 31 |
+
svg {"{"}
|
| 32 |
+
fill: {SVG_COLOR}
|
| 33 |
+
{"}"}
|
| 34 |
+
</style>
|
| 35 |
+
<path d="M208 80c0-26.5 21.5-48 48-48h64c26.5 0 48 21.5 48 48v64c0 26.5-21.5 48-48 48h-8v40H464c30.9 0 56 25.1 56 56v32h8c26.5 0 48 21.5 48 48v64c0 26.5-21.5 48-48 48H464c-26.5 0-48-21.5-48-48V368c0-26.5 21.5-48 48-48h8V288c0-4.4-3.6-8-8-8H312v40h8c26.5 0 48 21.5 48 48v64c0 26.5-21.5 48-48 48H256c-26.5 0-48-21.5-48-48V368c0-26.5 21.5-48 48-48h8V280H112c-4.4 0-8 3.6-8 8v32h8c26.5 0 48 21.5 48 48v64c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V368c0-26.5 21.5-48 48-48h8V288c0-30.9 25.1-56 56-56H264V192h-8c-26.5 0-48-21.5-48-48V80z"/>
|
| 36 |
+
</svg>
|
| 37 |
+
</a>
|
| 38 |
+
<!-- Resto de los enlaces de redes sociales... -->
|
| 39 |
+
</div>
|
| 40 |
+
'''
|
| 41 |
+
|
| 42 |
+
# HTML para el logo
|
| 43 |
+
def get_html_subtify_logo(new_width, new_height):
|
| 44 |
+
return f"""
|
| 45 |
+
<div style="display: flex; justify-content: center; align-items: center;">
|
| 46 |
+
<img src='https://pub-fb664c455eca46a2ba762a065ac900f7.r2.dev/subtify_logo-scaled.webp' width={new_width}px height={new_height}px >
|
| 47 |
+
</div>
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
# HTML para el botón de Buy Me a Coffee
|
| 51 |
+
html_buy_me_a_coffe = '''
|
| 52 |
+
<div style="float: right;">
|
| 53 |
+
<a href="https://www.buymeacoffee.com/maximofn" target="_blank">
|
| 54 |
+
<img src="https://img.shields.io/badge/Buy_Me_A_Coffee-support_my_work-FFDD00?style=for-the-badge&logo=buy-me-a-coffee&logoColor=white&labelColor=101010" alt="buy me a coffe">
|
| 55 |
+
</a>
|
| 56 |
+
</div>
|
| 57 |
+
'''
|
url_manager.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import gradio as gr
|
| 3 |
+
from pytube import YouTube
|
| 4 |
+
from lang_list import union_language_dict
|
| 5 |
+
|
| 6 |
+
language_dict = union_language_dict()
|
| 7 |
+
|
| 8 |
+
def get_youtube_thumbnail(url):
|
| 9 |
+
yt = YouTube(url)
|
| 10 |
+
thumbnail_url = yt.thumbnail_url
|
| 11 |
+
return thumbnail_url
|
| 12 |
+
|
| 13 |
+
def is_valid_youtube_url(url):
|
| 14 |
+
# This regular expression should match the following YouTube URL formats:
|
| 15 |
+
# - https://youtube.com/watch?v=video_id
|
| 16 |
+
# - https://www.youtube.com/watch?v=video_id
|
| 17 |
+
# - https://youtu.be/video_id
|
| 18 |
+
patron_youtube = r'(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)[\w-]+'
|
| 19 |
+
return bool(re.match(patron_youtube, url))
|
| 20 |
+
|
| 21 |
+
def is_valid_twitch_url(url):
|
| 22 |
+
# This regular expression should match the following Twitch URL formats:
|
| 23 |
+
# - https://twitch.tv/channel_name
|
| 24 |
+
# - https://www.twitch.tv/channel_name
|
| 25 |
+
# - https://twitch.tv/videos/video_id
|
| 26 |
+
twitch_pattern = r'(https?://)?(www\.)?twitch\.tv/(videos/\d+|\w+)'
|
| 27 |
+
return bool(re.match(twitch_pattern, url))
|
| 28 |
+
|
| 29 |
+
def is_valid_url(url):
|
| 30 |
+
num_speaker = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
|
| 31 |
+
source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
| 32 |
+
target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
| 33 |
+
advanced_setings = gr.Accordion(visible=True)
|
| 34 |
+
number_of_speakers = gr.Dropdown(visible=True, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True)
|
| 35 |
+
subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=True)
|
| 36 |
+
|
| 37 |
+
# Youtube
|
| 38 |
+
if "youtube" in url.lower() or "youtu.be" in url.lower():
|
| 39 |
+
if is_valid_youtube_url(url):
|
| 40 |
+
thumbnail = get_youtube_thumbnail(url)
|
| 41 |
+
if thumbnail:
|
| 42 |
+
return (
|
| 43 |
+
gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False),
|
| 44 |
+
source_languaje,
|
| 45 |
+
target_languaje,
|
| 46 |
+
advanced_setings,
|
| 47 |
+
number_of_speakers,
|
| 48 |
+
subtify_button,
|
| 49 |
+
)
|
| 50 |
+
else:
|
| 51 |
+
return (
|
| 52 |
+
gr.Image(value="assets/youtube-no-thumbnails.webp", visible=True, show_download_button=False, container=False),
|
| 53 |
+
source_languaje,
|
| 54 |
+
target_languaje,
|
| 55 |
+
advanced_setings,
|
| 56 |
+
number_of_speakers,
|
| 57 |
+
subtify_button,
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
# Twitch
|
| 61 |
+
elif "twitch" in url.lower() or "twitch.tv" in url.lower():
|
| 62 |
+
if is_valid_twitch_url(url):
|
| 63 |
+
return (
|
| 64 |
+
gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False),
|
| 65 |
+
source_languaje,
|
| 66 |
+
target_languaje,
|
| 67 |
+
advanced_setings,
|
| 68 |
+
number_of_speakers,
|
| 69 |
+
subtify_button,
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
# Error
|
| 73 |
+
visible = False
|
| 74 |
+
image = gr.Image(value="assets/youtube_error.webp", visible=visible, show_download_button=False, container=False)
|
| 75 |
+
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
| 76 |
+
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
| 77 |
+
advanced_setings = gr.Accordion(visible=visible)
|
| 78 |
+
number_of_speakers = gr.Dropdown(visible=visible, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True)
|
| 79 |
+
subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible)
|
| 80 |
+
return (
|
| 81 |
+
image,
|
| 82 |
+
source_languaje,
|
| 83 |
+
target_languaje,
|
| 84 |
+
advanced_setings,
|
| 85 |
+
number_of_speakers,
|
| 86 |
+
subtify_button,
|
| 87 |
+
)
|