caspr

Paused

App Files Files Community

artificialguybr commited on Oct 10, 2023

Commit

2de3a57

1 Parent(s): da316b7

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -21

app.py CHANGED Viewed

@@ -1,39 +1,42 @@
 import gradio as gr
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
-from subprocess import run
 from faster_whisper import WhisperModel
 import json
 import tempfile
-import os  # Importando o módulo os
-import ffmpeg
 from zipfile import ZipFile
 import stat
 ZipFile("ffmpeg.zip").extractall()
 st = os.stat('ffmpeg')
 os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
-# Carregar mapeamento de idiomas
 with open('language_codes.json', 'r') as f:
     lang_codes = json.load(f)
-# Inicializar modelos
 tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
 model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
 whisper_model = WhisperModel("large-v2", device="cuda", compute_type="float16")
 def process_video(Video, target_language):
-    print(type(Video))
     audio_file = tempfile.NamedTemporaryFile(suffix=".wav").name
-    print(f"Running FFmpeg command: ffmpeg -i {Video} {audio_file}")
-    run(["ffmpeg", "-i", Video, audio_file])
-    print(f"Checking if temporary file exists: {os.path.exists(audio_file)}")
-    print(f"Checking if video file exists: {os.path.exists(Video)}")
-    # 2. Transcrição
-    segments, _ = whisper_model.transcribe(audio_file, beam_size=5)  # Usando audio_file
     segments = list(segments)
-    # Criar o arquivo .srt com carimbos de tempo
     temp_transcript_file = tempfile.NamedTemporaryFile(delete=False, suffix=".srt")
     with open(temp_transcript_file.name, "w", encoding="utf-8") as f:
         counter = 1
@@ -52,8 +55,7 @@ def process_video(Video, target_language):
             f.write(f"{segment.text}\n\n")
             counter += 1
-    # 3. Tradução
-    flores_code = lang_codes.get(target_language, "eng_Latn")  # Definindo flores_code
     temp_translated_file = tempfile.NamedTemporaryFile(delete=False, suffix=".srt")
     with open(temp_transcript_file.name, "r", encoding="utf-8") as infile, open(temp_translated_file.name, "w", encoding="utf-8") as outfile:
         for line in infile:
@@ -67,15 +69,20 @@ def process_video(Video, target_language):
             else:
                 outfile.write("\n")
-    # 5. Incorporar legenda
-    output_video = "output_video.mp4"  # Definindo output_video
-    run(["ffmpeg", "-i", Video, "-vf", f"subtitles={temp_translated_file.name}", output_video])
     os.unlink(temp_transcript_file.name)
     os.unlink(temp_translated_file.name)
-    return output_video  # Retornando output_video
-# Interface Gradio
 iface = gr.Interface(
     fn=process_video,
     inputs=[
@@ -87,4 +94,4 @@ iface = gr.Interface(
     title="VIDEO TRANSCRIPTION AND TRANSLATION"
 )
-iface.launch()

 import gradio as gr
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+from subprocess import run, CalledProcessError
 from faster_whisper import WhisperModel
 import json
 import tempfile
+import os
 from zipfile import ZipFile
 import stat
+def run_command(command):
+    try:
+        run(command, check=True)
+    except CalledProcessError as e:
+        print(f"Command failed with error: {e}")
+        return False
+    return True
 ZipFile("ffmpeg.zip").extractall()
 st = os.stat('ffmpeg')
 os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
 with open('language_codes.json', 'r') as f:
     lang_codes = json.load(f)
 tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
 model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
 whisper_model = WhisperModel("large-v2", device="cuda", compute_type="float16")
 def process_video(Video, target_language):
     audio_file = tempfile.NamedTemporaryFile(suffix=".wav").name
+    if not run_command(["ffmpeg", "-i", Video, audio_file]):
+        print("FFmpeg command failed. Exiting.")
+        return
+    segments, _ = whisper_model.transcribe(audio_file, beam_size=5)
     segments = list(segments)
     temp_transcript_file = tempfile.NamedTemporaryFile(delete=False, suffix=".srt")
     with open(temp_transcript_file.name, "w", encoding="utf-8") as f:
         counter = 1
             f.write(f"{segment.text}\n\n")
             counter += 1
+    flores_code = lang_codes.get(target_language, "eng_Latn")
     temp_translated_file = tempfile.NamedTemporaryFile(delete=False, suffix=".srt")
     with open(temp_transcript_file.name, "r", encoding="utf-8") as infile, open(temp_translated_file.name, "w", encoding="utf-8") as outfile:
         for line in infile:
             else:
                 outfile.write("\n")
+    if not os.path.exists(temp_translated_file.name):
+        print("Subtitle file does not exist. Exiting.")
+        return
+    output_video = "output_video.mp4"
+    if not run_command(["ffmpeg", "-i", Video, "-vf", f"subtitles={temp_translated_file.name}", output_video]):
+        print("FFmpeg command for embedding subtitles failed. Exiting.")
+        return
     os.unlink(temp_transcript_file.name)
     os.unlink(temp_translated_file.name)
+    return output_video
 iface = gr.Interface(
     fn=process_video,
     inputs=[
     title="VIDEO TRANSCRIPTION AND TRANSLATION"
 )
+iface.launch()