Change UI
Browse files- app.py +80 -86
- requirements.txt +0 -11
- transcribe.py +5 -1
app.py
CHANGED
|
@@ -26,7 +26,7 @@ if DEVICE == "cpu":
|
|
| 26 |
# I supose that I am on huggingface server
|
| 27 |
SECONDS = 300
|
| 28 |
else:
|
| 29 |
-
SECONDS =
|
| 30 |
|
| 31 |
YOUTUBE = "youtube"
|
| 32 |
TWITCH = "twitch"
|
|
@@ -231,8 +231,9 @@ def clear_video_url():
|
|
| 231 |
image = gr.Image(visible=visible, scale=1)
|
| 232 |
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
| 233 |
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
|
|
|
|
|
|
| 234 |
translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible)
|
| 235 |
-
original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
|
| 236 |
original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
|
| 237 |
original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
|
| 238 |
return (
|
|
@@ -240,8 +241,8 @@ def clear_video_url():
|
|
| 240 |
image,
|
| 241 |
source_languaje,
|
| 242 |
target_languaje,
|
|
|
|
| 243 |
translate_button,
|
| 244 |
-
original_audio,
|
| 245 |
original_audio_transcribed,
|
| 246 |
original_audio_translated,
|
| 247 |
)
|
|
@@ -260,10 +261,9 @@ def is_valid_youtube_url(url):
|
|
| 260 |
def is_valid_url(url):
|
| 261 |
source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
| 262 |
target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=True)
|
| 267 |
subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=True, interactive=False)
|
| 268 |
|
| 269 |
# Youtube
|
|
@@ -275,24 +275,18 @@ def is_valid_url(url):
|
|
| 275 |
gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False),
|
| 276 |
source_languaje,
|
| 277 |
target_languaje,
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
original_audio_transcribed,
|
| 282 |
-
original_audio_translated,
|
| 283 |
-
subtitled_video
|
| 284 |
)
|
| 285 |
else:
|
| 286 |
return (
|
| 287 |
gr.Image(value="assets/youtube-no-thumbnails.webp", visible=True, show_download_button=False, container=False),
|
| 288 |
source_languaje,
|
| 289 |
target_languaje,
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
original_audio_transcribed,
|
| 294 |
-
original_audio_translated,
|
| 295 |
-
subtitled_video
|
| 296 |
)
|
| 297 |
|
| 298 |
# Twitch
|
|
@@ -301,12 +295,9 @@ def is_valid_url(url):
|
|
| 301 |
gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False),
|
| 302 |
source_languaje,
|
| 303 |
target_languaje,
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
original_audio_transcribed,
|
| 308 |
-
original_audio_translated,
|
| 309 |
-
subtitled_video
|
| 310 |
)
|
| 311 |
|
| 312 |
# Error
|
|
@@ -314,49 +305,40 @@ def is_valid_url(url):
|
|
| 314 |
image = gr.Image(value="assets/youtube_error.webp", visible=visible, show_download_button=False, container=False)
|
| 315 |
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
| 316 |
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
|
| 321 |
-
original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
|
| 322 |
-
subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False)
|
| 323 |
return (
|
| 324 |
image,
|
| 325 |
source_languaje,
|
| 326 |
target_languaje,
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
original_audio_transcribed,
|
| 331 |
-
original_audio_translated,
|
| 332 |
-
subtitled_video
|
| 333 |
)
|
| 334 |
|
| 335 |
-
def get_audio_and_video_from_video(url
|
| 336 |
python_file = "download.py"
|
| 337 |
command = f"python {python_file} {url}"
|
| 338 |
os.system(command)
|
| 339 |
-
|
| 340 |
|
| 341 |
audio = "audios/download_audio.mp3"
|
| 342 |
video = "videos/download_video.mp4"
|
| 343 |
|
| 344 |
return (
|
| 345 |
-
gr.
|
| 346 |
gr.Textbox(value=audio, label="Original audio path", elem_id="original_audio_path", visible=False),
|
| 347 |
gr.Textbox(value=video, label="Original video path", elem_id="original_video_path", visible=False)
|
| 348 |
)
|
| 349 |
|
| 350 |
-
def
|
| 351 |
folder_vocals = "vocals"
|
| 352 |
folder_chunck = "chunks"
|
| 353 |
-
folder_concatenated = "concatenated_transcriptions"
|
| 354 |
if not os.path.exists(folder_vocals):
|
| 355 |
os.makedirs(folder_vocals)
|
| 356 |
if not os.path.exists(folder_chunck):
|
| 357 |
os.makedirs(folder_chunck)
|
| 358 |
-
if not os.path.exists(folder_concatenated):
|
| 359 |
-
os.makedirs(folder_concatenated)
|
| 360 |
python_file = "slice_audio.py"
|
| 361 |
command = f"python {python_file} {audio_path} {SECONDS}"
|
| 362 |
os.system(command)
|
|
@@ -365,7 +347,14 @@ def trascribe_audio(audio_path, source_languaje):
|
|
| 365 |
f.write(str(0))
|
| 366 |
command = f"mv {folder_chunck}/*.mp3 {folder_vocals}/"
|
| 367 |
os.system(command)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 368 |
|
|
|
|
|
|
|
| 369 |
python_file = "transcribe.py"
|
| 370 |
chunck_file = "chunks/output_files.txt"
|
| 371 |
speakers_file = "vocals/speakers.txt"
|
|
@@ -393,6 +382,16 @@ def trascribe_audio(audio_path, source_languaje):
|
|
| 393 |
command = f"rm {vocal}"
|
| 394 |
os.system(command)
|
| 395 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 396 |
python_file = "concat_transcriptions.py"
|
| 397 |
command = f"python {python_file} {chunck_file} {SECONDS} {speakers_file}"
|
| 398 |
os.system(command)
|
|
@@ -411,8 +410,8 @@ def trascribe_audio(audio_path, source_languaje):
|
|
| 411 |
result = f.read()
|
| 412 |
|
| 413 |
return (
|
| 414 |
-
|
| 415 |
-
gr.Textbox(value=audio_transcribed, label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False)
|
| 416 |
)
|
| 417 |
|
| 418 |
def translate_transcription(original_audio_transcribed_path, source_languaje, target_languaje):
|
|
@@ -431,7 +430,7 @@ def translate_transcription(original_audio_transcribed_path, source_languaje, ta
|
|
| 431 |
os.system(command)
|
| 432 |
|
| 433 |
return (
|
| 434 |
-
|
| 435 |
gr.Textbox(value=translated_transcription, label="Original audio translated", elem_id="original_audio_translated", visible=False)
|
| 436 |
)
|
| 437 |
|
|
@@ -458,56 +457,52 @@ def add_translated_subtitles_to_video(original_video_path, original_audio_path,
|
|
| 458 |
def subtify():
|
| 459 |
with gr.Blocks() as demo:
|
| 460 |
# Layout
|
|
|
|
| 461 |
gr.Markdown("""# Subtify""")
|
| 462 |
-
gr.Markdown(f"translate, Python: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}")
|
| 463 |
-
# model = transformers.AutoModel.from_pretrained("huggingface/my_model")
|
| 464 |
-
# gr.Markdown(f"model.config.url: {model.config.url}")
|
| 465 |
-
token = os.getenv("HF_TOKEN")
|
| 466 |
-
if token is not None:
|
| 467 |
-
print(token)
|
| 468 |
-
gr.Markdown(f"Huggingface token: {token}")
|
| 469 |
-
else:
|
| 470 |
-
gr.Markdown(f"Huggingface token: None")
|
| 471 |
with gr.Row(variant="panel"):
|
| 472 |
url_textbox = gr.Textbox(placeholder="Add video URL here", label="Video URL", elem_id="video_url", scale=1, interactive=True)
|
| 473 |
-
copy_button = gr.Button(size="sm", icon="icons/copy.svg", value="", min_width="10px", scale=0)
|
| 474 |
-
delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="", min_width="10px", scale=0)
|
| 475 |
|
| 476 |
-
stream_page = gr.Textbox(label="Stream page", elem_id="stream_page", visible=False)
|
| 477 |
visible = False
|
| 478 |
with gr.Row(equal_height=False):
|
| 479 |
image = gr.Image(visible=visible, scale=1)
|
| 480 |
with gr.Column():
|
| 481 |
with gr.Row():
|
| 482 |
-
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
| 483 |
-
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
|
|
|
| 484 |
with gr.Row():
|
| 485 |
subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible)
|
| 486 |
|
| 487 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 488 |
original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=False)
|
| 489 |
original_video_path = gr.Textbox(label="Original video path", elem_id="original_video_path", visible=False)
|
| 490 |
-
original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
|
| 491 |
original_audio_transcribed_path = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False)
|
| 492 |
-
original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
|
| 493 |
original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=False)
|
| 494 |
subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False)
|
| 495 |
|
| 496 |
# Events
|
| 497 |
# copy_button.click(fn=copy_url_from_clipboard, outputs=url_textbox)
|
| 498 |
-
delete_button.click(
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
)
|
| 511 |
url_textbox.change(
|
| 512 |
fn=is_valid_url,
|
| 513 |
inputs=url_textbox,
|
|
@@ -515,18 +510,17 @@ def subtify():
|
|
| 515 |
image,
|
| 516 |
source_languaje,
|
| 517 |
target_languaje,
|
|
|
|
| 518 |
subtify_button,
|
| 519 |
-
|
| 520 |
-
original_audio,
|
| 521 |
-
original_audio_transcribed,
|
| 522 |
-
original_audio_translated,
|
| 523 |
-
subtitled_video
|
| 524 |
]
|
| 525 |
)
|
| 526 |
-
subtify_button.click(fn=get_audio_and_video_from_video, inputs=[url_textbox
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
|
|
|
|
|
|
| 530 |
|
| 531 |
demo.launch()
|
| 532 |
|
|
|
|
| 26 |
# I supose that I am on huggingface server
|
| 27 |
SECONDS = 300
|
| 28 |
else:
|
| 29 |
+
SECONDS = 300
|
| 30 |
|
| 31 |
YOUTUBE = "youtube"
|
| 32 |
TWITCH = "twitch"
|
|
|
|
| 231 |
image = gr.Image(visible=visible, scale=1)
|
| 232 |
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
| 233 |
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
| 234 |
+
num_speaker = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
|
| 235 |
+
number_of_speakers = gr.Dropdown(visible=visible, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True)
|
| 236 |
translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible)
|
|
|
|
| 237 |
original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
|
| 238 |
original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
|
| 239 |
return (
|
|
|
|
| 241 |
image,
|
| 242 |
source_languaje,
|
| 243 |
target_languaje,
|
| 244 |
+
number_of_speakers,
|
| 245 |
translate_button,
|
|
|
|
| 246 |
original_audio_transcribed,
|
| 247 |
original_audio_translated,
|
| 248 |
)
|
|
|
|
| 261 |
def is_valid_url(url):
|
| 262 |
source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
| 263 |
target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
| 264 |
+
num_speaker = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
|
| 265 |
+
number_of_speakers = gr.Dropdown(visible=True, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True)
|
| 266 |
+
subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=True)
|
|
|
|
| 267 |
subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=True, interactive=False)
|
| 268 |
|
| 269 |
# Youtube
|
|
|
|
| 275 |
gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False),
|
| 276 |
source_languaje,
|
| 277 |
target_languaje,
|
| 278 |
+
number_of_speakers,
|
| 279 |
+
subtify_button,
|
| 280 |
+
subtitled_video,
|
|
|
|
|
|
|
|
|
|
| 281 |
)
|
| 282 |
else:
|
| 283 |
return (
|
| 284 |
gr.Image(value="assets/youtube-no-thumbnails.webp", visible=True, show_download_button=False, container=False),
|
| 285 |
source_languaje,
|
| 286 |
target_languaje,
|
| 287 |
+
number_of_speakers,
|
| 288 |
+
subtify_button,
|
| 289 |
+
subtitled_video,
|
|
|
|
|
|
|
|
|
|
| 290 |
)
|
| 291 |
|
| 292 |
# Twitch
|
|
|
|
| 295 |
gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False),
|
| 296 |
source_languaje,
|
| 297 |
target_languaje,
|
| 298 |
+
number_of_speakers,
|
| 299 |
+
subtify_button,
|
| 300 |
+
subtitled_video,
|
|
|
|
|
|
|
|
|
|
| 301 |
)
|
| 302 |
|
| 303 |
# Error
|
|
|
|
| 305 |
image = gr.Image(value="assets/youtube_error.webp", visible=visible, show_download_button=False, container=False)
|
| 306 |
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
|
| 307 |
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
|
| 308 |
+
number_of_speakers = gr.Dropdown(visible=visible, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True)
|
| 309 |
+
subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible)
|
| 310 |
+
subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False),
|
|
|
|
|
|
|
|
|
|
| 311 |
return (
|
| 312 |
image,
|
| 313 |
source_languaje,
|
| 314 |
target_languaje,
|
| 315 |
+
number_of_speakers,
|
| 316 |
+
subtify_button,
|
| 317 |
+
subtitled_video,
|
|
|
|
|
|
|
|
|
|
| 318 |
)
|
| 319 |
|
| 320 |
+
def get_audio_and_video_from_video(url):
|
| 321 |
python_file = "download.py"
|
| 322 |
command = f"python {python_file} {url}"
|
| 323 |
os.system(command)
|
| 324 |
+
sleep(5)
|
| 325 |
|
| 326 |
audio = "audios/download_audio.mp3"
|
| 327 |
video = "videos/download_video.mp4"
|
| 328 |
|
| 329 |
return (
|
| 330 |
+
gr.Textbox(value="Ok", label="Video downloaded", elem_id="video_downloaded", interactive=False, visible=True),
|
| 331 |
gr.Textbox(value=audio, label="Original audio path", elem_id="original_audio_path", visible=False),
|
| 332 |
gr.Textbox(value=video, label="Original video path", elem_id="original_video_path", visible=False)
|
| 333 |
)
|
| 334 |
|
| 335 |
+
def slice_audio(audio_path):
|
| 336 |
folder_vocals = "vocals"
|
| 337 |
folder_chunck = "chunks"
|
|
|
|
| 338 |
if not os.path.exists(folder_vocals):
|
| 339 |
os.makedirs(folder_vocals)
|
| 340 |
if not os.path.exists(folder_chunck):
|
| 341 |
os.makedirs(folder_chunck)
|
|
|
|
|
|
|
| 342 |
python_file = "slice_audio.py"
|
| 343 |
command = f"python {python_file} {audio_path} {SECONDS}"
|
| 344 |
os.system(command)
|
|
|
|
| 347 |
f.write(str(0))
|
| 348 |
command = f"mv {folder_chunck}/*.mp3 {folder_vocals}/"
|
| 349 |
os.system(command)
|
| 350 |
+
# sleep(5)
|
| 351 |
+
|
| 352 |
+
return (
|
| 353 |
+
gr.Textbox(value="Ok", label="Video sliced", elem_id="video_sliced", interactive=False, visible=True)
|
| 354 |
+
)
|
| 355 |
|
| 356 |
+
def trascribe_audio(source_languaje):
|
| 357 |
+
folder_vocals = "vocals"
|
| 358 |
python_file = "transcribe.py"
|
| 359 |
chunck_file = "chunks/output_files.txt"
|
| 360 |
speakers_file = "vocals/speakers.txt"
|
|
|
|
| 382 |
command = f"rm {vocal}"
|
| 383 |
os.system(command)
|
| 384 |
|
| 385 |
+
return (
|
| 386 |
+
gr.Textbox(value="Ok", label="Video transcribed", elem_id="video_transcribed", interactive=False, visible=True)
|
| 387 |
+
)
|
| 388 |
+
|
| 389 |
+
def concatenate_transcriptions():
|
| 390 |
+
folder_concatenated = "concatenated_transcriptions"
|
| 391 |
+
if not os.path.exists(folder_concatenated):
|
| 392 |
+
os.makedirs(folder_concatenated)
|
| 393 |
+
chunck_file = "chunks/output_files.txt"
|
| 394 |
+
speakers_file = "vocals/speakers.txt"
|
| 395 |
python_file = "concat_transcriptions.py"
|
| 396 |
command = f"python {python_file} {chunck_file} {SECONDS} {speakers_file}"
|
| 397 |
os.system(command)
|
|
|
|
| 410 |
result = f.read()
|
| 411 |
|
| 412 |
return (
|
| 413 |
+
gr.Textbox(value="Ok", label="Transcription translated", elem_id="transcription_translated", interactive=False, visible=True),
|
| 414 |
+
gr.Textbox(value=audio_transcribed, label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False),
|
| 415 |
)
|
| 416 |
|
| 417 |
def translate_transcription(original_audio_transcribed_path, source_languaje, target_languaje):
|
|
|
|
| 430 |
os.system(command)
|
| 431 |
|
| 432 |
return (
|
| 433 |
+
gr.Textbox(value="Ok", label="Video subtitled", elem_id="video_subtitled", interactive=False, visible=True),
|
| 434 |
gr.Textbox(value=translated_transcription, label="Original audio translated", elem_id="original_audio_translated", visible=False)
|
| 435 |
)
|
| 436 |
|
|
|
|
| 457 |
def subtify():
|
| 458 |
with gr.Blocks() as demo:
|
| 459 |
# Layout
|
| 460 |
+
num_speaker = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
|
| 461 |
gr.Markdown("""# Subtify""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
with gr.Row(variant="panel"):
|
| 463 |
url_textbox = gr.Textbox(placeholder="Add video URL here", label="Video URL", elem_id="video_url", scale=1, interactive=True)
|
| 464 |
+
# copy_button = gr.Button(size="sm", icon="icons/copy.svg", value="", min_width="10px", scale=0)
|
| 465 |
+
# delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="", min_width="10px", scale=0)
|
| 466 |
|
|
|
|
| 467 |
visible = False
|
| 468 |
with gr.Row(equal_height=False):
|
| 469 |
image = gr.Image(visible=visible, scale=1)
|
| 470 |
with gr.Column():
|
| 471 |
with gr.Row():
|
| 472 |
+
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True, info="Language of the video")
|
| 473 |
+
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True, info="Language to translate the subtitles")
|
| 474 |
+
number_of_speakers = gr.Dropdown(visible=visible, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True, info="Number of speakers in the video, if you don't know, select 10")
|
| 475 |
with gr.Row():
|
| 476 |
subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible)
|
| 477 |
|
| 478 |
+
with gr.Row():
|
| 479 |
+
video_donwloaded = gr.Textbox(placeholder="Waiting", label="Video downloaded", elem_id="video_downloaded", interactive=False, visible=True)
|
| 480 |
+
video_sliced = gr.Textbox(placeholder="Waiting", label="Video sliced", elem_id="video_sliced", interactive=False, visible=True)
|
| 481 |
+
video_transcribed = gr.Textbox(placeholder="Waiting", label="Video transcribed", elem_id="video_transcribed", interactive=False, visible=True)
|
| 482 |
+
video_translated = gr.Textbox(placeholder="Waiting", label="Transcription translated", elem_id="transcription_translated", interactive=False, visible=True)
|
| 483 |
+
video_subtitled = gr.Textbox(placeholder="Waiting", label="Video subtitled", elem_id="video_subtitled", interactive=False, visible=True)
|
| 484 |
+
|
| 485 |
original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=False)
|
| 486 |
original_video_path = gr.Textbox(label="Original video path", elem_id="original_video_path", visible=False)
|
|
|
|
| 487 |
original_audio_transcribed_path = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False)
|
|
|
|
| 488 |
original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=False)
|
| 489 |
subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False)
|
| 490 |
|
| 491 |
# Events
|
| 492 |
# copy_button.click(fn=copy_url_from_clipboard, outputs=url_textbox)
|
| 493 |
+
# delete_button.click(
|
| 494 |
+
# fn=clear_video_url,
|
| 495 |
+
# outputs=[
|
| 496 |
+
# url_textbox,
|
| 497 |
+
# image,
|
| 498 |
+
# source_languaje,
|
| 499 |
+
# target_languaje,
|
| 500 |
+
# number_of_speakers,
|
| 501 |
+
# subtify_button,
|
| 502 |
+
# original_audio_transcribed,
|
| 503 |
+
# original_audio_translated,
|
| 504 |
+
# ]
|
| 505 |
+
# )
|
| 506 |
url_textbox.change(
|
| 507 |
fn=is_valid_url,
|
| 508 |
inputs=url_textbox,
|
|
|
|
| 510 |
image,
|
| 511 |
source_languaje,
|
| 512 |
target_languaje,
|
| 513 |
+
number_of_speakers,
|
| 514 |
subtify_button,
|
| 515 |
+
subtitled_video,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 516 |
]
|
| 517 |
)
|
| 518 |
+
subtify_button.click(fn=get_audio_and_video_from_video, inputs=[url_textbox], outputs=[video_donwloaded, original_audio_path, original_video_path])
|
| 519 |
+
video_donwloaded.change(fn=slice_audio, inputs=[original_audio_path], outputs=[video_sliced])
|
| 520 |
+
video_sliced.change(fn=trascribe_audio, inputs=[source_languaje], outputs=[video_transcribed])
|
| 521 |
+
video_transcribed.change(fn=concatenate_transcriptions, inputs=[], outputs=[video_translated, original_audio_transcribed_path])
|
| 522 |
+
video_translated.change(fn=translate_transcription, inputs=[original_audio_transcribed_path, source_languaje, target_languaje], outputs=[video_subtitled, original_audio_translated_path])
|
| 523 |
+
video_subtitled.change(fn=add_translated_subtitles_to_video, inputs=[original_video_path, original_audio_path, original_audio_translated_path], outputs=subtitled_video)
|
| 524 |
|
| 525 |
demo.launch()
|
| 526 |
|
requirements.txt
CHANGED
|
@@ -9,18 +9,7 @@ pytube
|
|
| 9 |
yt-dlp
|
| 10 |
twitch-dl
|
| 11 |
|
| 12 |
-
# # mossformer --> separate speech audios
|
| 13 |
-
# torch
|
| 14 |
-
# torchvision
|
| 15 |
-
# torchaudio
|
| 16 |
-
# speechbrain
|
| 17 |
-
# soundfile
|
| 18 |
-
# modelscope
|
| 19 |
-
# rotary-embedding-torch
|
| 20 |
-
# transformers
|
| 21 |
-
|
| 22 |
# Trascribe audios
|
| 23 |
-
# git+https://github.com/openai/whisper.git
|
| 24 |
git+https://github.com/m-bain/whisperx.git
|
| 25 |
pyannote.audio
|
| 26 |
|
|
|
|
| 9 |
yt-dlp
|
| 10 |
twitch-dl
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
# Trascribe audios
|
|
|
|
| 13 |
git+https://github.com/m-bain/whisperx.git
|
| 14 |
pyannote.audio
|
| 15 |
|
transcribe.py
CHANGED
|
@@ -28,7 +28,11 @@ def transcribe(audio_file, language, device, vocals):
|
|
| 28 |
model = "large-v2"
|
| 29 |
# word_timestamps = True
|
| 30 |
print_progress = False
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
fp16 = True
|
| 33 |
batch_size = 8
|
| 34 |
verbose = False
|
|
|
|
| 28 |
model = "large-v2"
|
| 29 |
# word_timestamps = True
|
| 30 |
print_progress = False
|
| 31 |
+
if device == "cpu":
|
| 32 |
+
# I supose that I am on huggingface server
|
| 33 |
+
compute_type = "float32"
|
| 34 |
+
else:
|
| 35 |
+
compute_type = "float16"
|
| 36 |
fp16 = True
|
| 37 |
batch_size = 8
|
| 38 |
verbose = False
|