Spaces:

mohitrai76
/

DeepDub

Running

App Files Files Community

mohitrai76 commited on Jun 3, 2025

Commit

f9b4f82

verified ·

1 Parent(s): 031aac1

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -101

app.py CHANGED Viewed

@@ -54,7 +54,7 @@ def separate_vocals(input_path):
 class AudioProcessor:
     def __init__(self, device="cpu"):
         self.whisper_model = WhisperModel("small", device=device)
-        self.openrouter_api_key = "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"
         self.client = OpenAI(
             base_url="https://openrouter.ai/api/v1",
             api_key=self.openrouter_api_key,
@@ -122,7 +122,7 @@ class AudioProcessor:
                 messages=[
                     {
                         "role": "system",
-                        "content": f"You are a professional translator from English to {target_language}. Translate exactly as requested."
                     },
                     {
                         "role": "user",
@@ -260,17 +260,13 @@ async def process_audio_chunks(input_audio_path, voice, target_language):
     final_mix.export(output_path, format="wav")
     print(f"✅ Output saved as: {output_path}")
-    final_audio_path = output_path
-    final_background_path = background_path
     cleanup_files(chunk_files)
     shutil.rmtree(temp_dir, ignore_errors=True)
-    return final_audio_path, final_background_path
 # --- Gradio Interface ---
 def gradio_interface(video_file, voice, target_language):
     try:
-        # Create temporary directory for processing
         temp_dir = Path(tempfile.mkdtemp())
         input_video_path = temp_dir / "input_video.mp4"
@@ -278,26 +274,21 @@ def gradio_interface(video_file, voice, target_language):
         if not os.path.splitext(video_file.name)[1].lower() in ['.mp4', '.mov', '.avi', '.mkv']:
             raise ValueError("Invalid file type. Please upload a video file.")
-        # Save the uploaded file to the temporary directory
         shutil.copyfile(video_file.name, input_video_path)
-        # Extract audio from video
         audio_path, audio_temp_dir = extract_audio_from_video(str(input_video_path))
         if not audio_path:
             return None
-        # Process audio chunks
         audio_output_path, background_path = asyncio.run(process_audio_chunks(audio_path, voice, target_language))
         if audio_output_path is None or background_path is None:
             return None
-        # Combine with original video
         output_video_path = temp_dir / "translated_video.mp4"
         success = combine_video_audio(str(input_video_path), audio_output_path, str(output_video_path))
         if success:
-            # Return the path to the output video
             return str(output_video_path)
         else:
             return None
@@ -305,14 +296,8 @@ def gradio_interface(video_file, voice, target_language):
     except Exception as e:
         print(f"Error processing video: {e}")
         return None
-    finally:
-        # Cleanup temporary files
-        # Commented out for debugging purposes
-        # shutil.rmtree(temp_dir, ignore_errors=True)
-        pass
 def extract_audio_from_video(video_path):
-    """Extract audio from video file using ffmpeg"""
     temp_dir = tempfile.mkdtemp()
     audio_path = os.path.join(temp_dir, "extracted_audio.wav")
@@ -333,97 +318,30 @@ def extract_audio_from_video(video_path):
         return None, None
 def combine_video_audio(video_path, audio_path, output_path):
-    """Combine original video with new audio track"""
     try:
         subprocess.run([
-            "ffmpeg", "-y", "-i", video_path,
             "-i", audio_path,
-            "-c:v", "copy", "-map", "0:v:0", "-map", "1:a:0",
-            "-shortest", output_path
         ], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         return True
     except Exception as e:
-        print(f"Video combining error: {e}")
         return False
-# Voice options for each language
-voice_options = {
-    "Hindi": [
-        "hi-IN-MadhurNeural",   # Male
-        "hi-IN-SwaraNeural"     # Female
-    ],
-    "English": [
-        "en-US-GuyNeural",      # Male
-        "en-US-BenjaminRUS",    # Male
-        "en-US-ChristopherNeural",  # Male
-        "en-US-AriaNeural",     # Female
-        "en-US-JessaNeural",    # Female
-        "en-US-JennyNeural"     # Female
-    ],
-    "Spanish": [
-        "es-ES-AlvaroNeural",   # Male
-        "es-MX-JorgeNeural",    # Male
-        "es-US-AlonsoNeural",   # Male
-        "es-ES-ElviraNeural",   # Female
-        "es-MX-DaliaNeural",    # Female
-        "es-US-PalomaNeural"    # Female
-    ],
-    "French": [
-        "fr-FR-HenriNeural",    # Male
-        "fr-FR-RemyMultilingualNeural", # Male
-        "fr-CA-AntoineNeural",  # Male
-        "fr-FR-DeniseNeural",   # Female
-        "fr-FR-JulieNeural",    # Female
-        "fr-FR-VivienneMultilingualNeural"  # Female
-    ],
-    "Japanese": [
-        "ja-JP-KeitaNeural",    # Male
-        "ja-JP-DaichiNeural",   # Male
-        "ja-JP-RikuNeural",     # Male
-        "ja-JP-AoiNeural",      # Female
-        "ja-JP-NanamiNeural",   # Female
-        "ja-JP-ShioriNeural"    # Female
-    ],
-    "Korean": [
-        "ko-KR-InJoonNeural",   # Male
-         "ko-KR-SunHiNeural" # Female
-    ]
-}
-# Create Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("# DeepDub : Video Dubbing Application")
-    gr.Markdown("Upload a video and get a dubbed version with translated audio")
-    with gr.Row():
-        video_input = gr.File(file_types=[".mp4", ".mov", ".avi", ".mkv", ".webm", ".ogg"], label="Upload Video")
-        language_dropdown = gr.Dropdown(
-            list(voice_options.keys()),
-            label="Translate to",
-            value="Hindi"
-        )
-        voice_dropdown = gr.Dropdown(
-            voice_options["Hindi"],
-            label="Select Voice",
-            value="hi-IN-MadhurNeural"
-        )
-    output_video = gr.Video(label="Dubbed Video")
-    submit_btn = gr.Button("Start Dubbing")
-    def update_voice_options(language):
-        return gr.update(choices=voice_options[language], value=voice_options[language][0])
-    language_dropdown.change(update_voice_options, inputs=[language_dropdown], outputs=[voice_dropdown])
-    submit_btn.click(
-        gradio_interface,
-        inputs=[video_input, voice_dropdown, language_dropdown],
-        outputs=output_video
-    )
-demo.queue().launch(server_name="0.0.0.0", debug=True)

 class AudioProcessor:
     def __init__(self, device="cpu"):
         self.whisper_model = WhisperModel("small", device=device)
+        self.openrouter_api_key = "your_openrouter_api_key_here"
         self.client = OpenAI(
             base_url="https://openrouter.ai/api/v1",
             api_key=self.openrouter_api_key,
                 messages=[
                     {
                         "role": "system",
+                        "content": f"You are a professional translator from Given language to {target_language}. Translate exactly as requested."
                     },
                     {
                         "role": "user",
     final_mix.export(output_path, format="wav")
     print(f"✅ Output saved as: {output_path}")
     cleanup_files(chunk_files)
     shutil.rmtree(temp_dir, ignore_errors=True)
+    return output_path, background_path
 # --- Gradio Interface ---
 def gradio_interface(video_file, voice, target_language):
     try:
         temp_dir = Path(tempfile.mkdtemp())
         input_video_path = temp_dir / "input_video.mp4"
         if not os.path.splitext(video_file.name)[1].lower() in ['.mp4', '.mov', '.avi', '.mkv']:
             raise ValueError("Invalid file type. Please upload a video file.")
         shutil.copyfile(video_file.name, input_video_path)
         audio_path, audio_temp_dir = extract_audio_from_video(str(input_video_path))
         if not audio_path:
             return None
         audio_output_path, background_path = asyncio.run(process_audio_chunks(audio_path, voice, target_language))
         if audio_output_path is None or background_path is None:
             return None
         output_video_path = temp_dir / "translated_video.mp4"
         success = combine_video_audio(str(input_video_path), audio_output_path, str(output_video_path))
         if success:
             return str(output_video_path)
         else:
             return None
     except Exception as e:
         print(f"Error processing video: {e}")
         return None
 def extract_audio_from_video(video_path):
     temp_dir = tempfile.mkdtemp()
     audio_path = os.path.join(temp_dir, "extracted_audio.wav")
         return None, None
 def combine_video_audio(video_path, audio_path, output_path):
     try:
         subprocess.run([
+            "ffmpeg", "-y",
+            "-i", video_path,
             "-i", audio_path,
+            "-c:v", "copy",
+            "-c:a", "aac",
+            "-strict", "experimental",
+            output_path
         ], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         return True
     except Exception as e:
+        print(f"Video/audio combine error: {e}")
         return False
+# --- Gradio UI Setup ---
 with gr.Blocks() as demo:
+    gr.Markdown("# Video Dubbing & Translation App")
+    video_input = gr.File(label="Upload Video", file_types=['.mp4', '.mov', '.avi', '.mkv'])
+    voice_selector = gr.Dropdown(choices=["en-US-JennyNeural", "en-GB-RyanNeural", "hi-IN-SwaraNeural"], label="Select Voice", value="en-US-JennyNeural")
+    target_lang = gr.Textbox(label="Target Language (e.g. Hindi, French, Spanish)", value="Hindi")
+    translate_btn = gr.Button("Translate & Dub")
+    output_video = gr.Video(label="Output Video")
+    translate_btn.click(fn=gradio_interface, inputs=[video_input, voice_selector, target_lang], outputs=output_video)
+demo.launch()