Spaces:

Easyworkstation
/

caspr

Paused

App Files Files Community

artificialguybr commited on Oct 11, 2023

Commit

fabf10e

1 Parent(s): fdbd781

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -16

app.py CHANGED Viewed

@@ -33,11 +33,16 @@ def process_video(Video, target_language):
     audio_file = f"{uuid.uuid4()}.wav"
     run(["ffmpeg", "-i", Video, audio_file])
     print("Iniciando transcrição com Whisper")
     segments, _ = whisper_model.transcribe(audio_file, beam_size=5)
     segments = list(segments)
     transcript_file = f"{uuid.uuid4()}.srt"
-    with open(transcript_file, "w", encoding="utf-8") as f:
         counter = 1
         for segment in segments:
             start_minutes = int(segment.start // 60)
@@ -52,33 +57,43 @@ def process_video(Video, target_language):
             f.write(f"{formatted_start} --> {formatted_end}\n")
             f.write(f"{segment.text}\n\n")
             counter += 1
-    flores_code = lang_codes.get(target_language, "eng_Latn")
-    translated_file = f"{uuid.uuid4()}.srt"
-    with open(transcript_file, "r", encoding="utf-8") as infile, open(translated_file, "w", encoding="utf-8") as outfile:
-        for line in infile:
             if line.strip().isnumeric() or "-->" in line:
-                outfile.write(line)
             elif line.strip() != "":
                 inputs = tokenizer(line.strip(), return_tensors="pt")
                 translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id[flores_code], max_length=100)
                 translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
-                outfile.write(translated_text + "\n")
             else:
-                outfile.write("\n")
     output_video = "output_video.mp4"
     # Debugging: Validate FFmpeg command for subtitle embedding
     print("Validating FFmpeg command for subtitle embedding...")
-    print(f"Translated SRT file: {translated_file}")
-    with open(translated_file, 'r', encoding='utf-8') as f:
         print(f"First few lines of translated SRT: {f.readlines()[:10]}")
-    if os.path.exists(translated_file):
-        print(f"{translated_file} exists.")
     else:
-        print(f"{translated_file} does not exist.")
     try:
-        translated_file_abs_path = os.path.abspath(translated_file)
-        result = subprocess.run(["ffmpeg", "-i", Video, "-vf", f"subtitles={translated_file_abs_path}", output_video], capture_output=True, text=True)
         if result.returncode == 0:
             print("FFmpeg executed successfully.")
         else:
@@ -104,4 +119,4 @@ iface = gr.Interface(
     title="VIDEO TRANSCRIPTION AND TRANSLATION"
 )
-iface.launch()

     audio_file = f"{uuid.uuid4()}.wav"
     run(["ffmpeg", "-i", Video, audio_file])
+    # Transcription with Whisper.
     print("Iniciando transcrição com Whisper")
     segments, _ = whisper_model.transcribe(audio_file, beam_size=5)
     segments = list(segments)
     transcript_file = f"{uuid.uuid4()}.srt"
+    # Create a list to hold the translated lines.
+    translated_lines = []
+    with open(transcript_file, "w+", encoding="utf-8") as f:
         counter = 1
         for segment in segments:
             start_minutes = int(segment.start // 60)
             f.write(f"{formatted_start} --> {formatted_end}\n")
             f.write(f"{segment.text}\n\n")
             counter += 1
+        # Move the file pointer to the beginning of the file.
+        f.seek(0)
+        # Translating the SRT from Whisper with NLLB.
+        flores_code = lang_codes.get(target_language, "eng_Latn")
+        for line in f:
             if line.strip().isnumeric() or "-->" in line:
+                translated_lines.append(line)
             elif line.strip() != "":
                 inputs = tokenizer(line.strip(), return_tensors="pt")
                 translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id[flores_code], max_length=100)
                 translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+                translated_lines.append(translated_text + "\n")
             else:
+                translated_lines.append("\n")
+        # Move the file pointer to the beginning of the file and truncate it.
+        f.seek(0)
+        f.truncate()
+        # Write the translated lines back into the original file.
+        f.writelines(translated_lines)
     output_video = "output_video.mp4"
     # Debugging: Validate FFmpeg command for subtitle embedding
     print("Validating FFmpeg command for subtitle embedding...")
+    print(f"Translated SRT file: {transcript_file}")
+    with open(transcript_file, 'r', encoding='utf-8') as f:
         print(f"First few lines of translated SRT: {f.readlines()[:10]}")
+    if os.path.exists(transcript_file):
+        print(f"{transcript_file} exists.")
     else:
+        print(f"{transcript_file} does not exist.")
     try:
+        transcript_file_abs_path = os.path.abspath(transcript_file)
+        result = subprocess.run(["ffmpeg", "-i", Video, "-vf", f"subtitles={transcript_file_abs_path}", output_video], capture_output=True, text=True)
         if result.returncode == 0:
             print("FFmpeg executed successfully.")
         else:
     title="VIDEO TRANSCRIPTION AND TRANSLATION"
 )
+iface.launch()