import gradio as gr import numpy as np import subprocess import tempfile import os from pathlib import Path import cv2 import torch # Mobile responsive CSS mobile_css = """ /* Mobile First Design */ @media (max-width: 768px) { .gradio-container { padding: 10px !important; margin: 0 !important; } .tab-nav { flex-wrap: wrap !important; } .tab-nav button { min-width: 80px !important; font-size: 12px !important; padding: 8px 12px !important; } .input-container { margin: 10px 0 !important; } .output-video { max-width: 100% !important; height: auto !important; } .btn-primary { width: 100% !important; margin: 10px 0 !important; padding: 12px !important; font-size: 16px !important; } } @media (min-width: 769px) and (max-width: 1024px) { .gradio-container { max-width: 95% !important; } } @media (min-width: 1025px) { .gradio-container { max-width: 1200px !important; margin: 0 auto !important; } } .header-title { text-align: center !important; margin-bottom: 20px !important; color: #2563eb !important; } .feature-card { border: 1px solid #e5e7eb !important; border-radius: 8px !important; padding: 15px !important; margin: 10px 0 !important; background: #f9fafb !important; } """ def process_lip_sync_basic(video_file, audio_input): """Basic lip sync processing using Wav2Lip""" if video_file is None or audio_input is None: return None, "❌ Video اور Audio دونوں required ہیں!" try: # Handle audio input (could be file path or tuple for mic) if isinstance(audio_input, tuple): # Microphone input: (sample_rate, audio_data) sample_rate, audio_data = audio_input # Save temp audio file with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio: import soundfile as sf sf.write(temp_audio.name, audio_data, sample_rate) audio_path = temp_audio.name else: # File upload audio_path = audio_input # Placeholder for actual Wav2Lip processing # In real implementation, you would use the Wav2Lip model here # For now, return the original video with success message return video_file, f"✅ Lip sync processing completed!\n📁 Video: {os.path.basename(video_file)}\n🎵 Audio: Processed successfully" except Exception as e: return None, f"❌ Error: {str(e)}" def process_text_to_speech_sync(video_file, text_input, voice_type): """Text to Speech + Lip Sync""" if video_file is None or not text_input.strip(): return None, "❌ Video اور Text دونوں required ہیں!" try: # Placeholder for TTS + Lip sync processing # Real implementation would: # 1. Convert text to speech using selected voice # 2. Apply lip sync to video using generated audio return video_file, f"✅ Text-to-Speech Lip Sync completed!\n📝 Text: {text_input[:50]}...\n🎭 Voice: {voice_type}" except Exception as e: return None, f"❌ Error: {str(e)}" def process_live_recording(video_file, live_audio): """Live recording lip sync (placeholder)""" if video_file is None: return None, "❌ Video file required!" if live_audio is None: return video_file, "🔴 Recording... (یہاں live audio processing ہوگی)" try: # Placeholder for real-time processing return video_file, "✅ Live recording processed!" except Exception as e: return None, f"❌ Error: {str(e)}" # Main Gradio Interface with gr.Blocks( theme=gr.themes.Soft(), css=mobile_css, title="Advanced Lip Sync Tool", analytics_enabled=False ) as demo: # Header gr.Markdown( """ # 🎬 Advanced Lip Sync Tool ### Professional Mobile-Friendly Lip Synchronization **💡 Features:** - 🎤 Microphone & File Audio Input - 📝 Text-to-Speech Integration - 🔴 Live Recording Support - 📱 Mobile Responsive Design """, elem_classes=["header-title"] ) with gr.Tabs(): # Tab 1: Microphone + Video with gr.TabItem("🎤 Microphone + Video", elem_id="tab-mic"): with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 📹 Upload Video", elem_classes=["feature-card"]) video_input1 = gr.Video( label="Video File", height=300 ) gr.Markdown("### 🎵 Audio Input", elem_classes=["feature-card"]) audio_input1 = gr.Audio( label="Audio (Microphone یا File)", sources=["microphone", "upload"], type="numpy" ) process_btn1 = gr.Button( "🚀 Process Lip Sync", variant="primary", size="lg", scale=2 ) with gr.Column(scale=1): gr.Markdown("### 🎬 Result", elem_classes=["feature-card"]) output_video1 = gr.Video( label="Processed Video", height=300, elem_classes=["output-video"] ) output_message1 = gr.Textbox( label="Status", lines=4, max_lines=6 ) process_btn1.click( process_lip_sync_basic, inputs=[video_input1, audio_input1], outputs=[output_video1, output_message1] ) # Tab 2: Text to Speech with gr.TabItem("📝 Text to Speech", elem_id="tab-tts"): with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 📹 Upload Video", elem_classes=["feature-card"]) video_input2 = gr.Video( label="Video File", height=250 ) gr.Markdown("### 📝 Text Input", elem_classes=["feature-card"]) text_input = gr.Textbox( label="Text for Speech", lines=4, placeholder="یہاں اپنا text لکھیں جو speech میں convert ہوگا..." ) voice_type = gr.Dropdown( label="🎭 Voice Type", choices=["Male", "Female", "Child", "Robot"], value="Female" ) process_btn2 = gr.Button( "🗣️ Generate Speech + Lip Sync", variant="primary", size="lg" ) with gr.Column(scale=1): gr.Markdown("### 🎬 Result", elem_classes=["feature-card"]) output_video2 = gr.Video( label="TTS Lip Sync Result", height=300, elem_classes=["output-video"] ) output_message2 = gr.Textbox( label="Status", lines=4, max_lines=6 ) process_btn2.click( process_text_to_speech_sync, inputs=[video_input2, text_input, voice_type], outputs=[output_video2, output_message2] ) # Tab 3: Live Recording with gr.TabItem("🔴 Live Recording", elem_id="tab-live"): with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 📹 Upload Video", elem_classes=["feature-card"]) video_input3 = gr.Video( label="Video File", height=250 ) gr.Markdown("### 🎙️ Live Recording", elem_classes=["feature-card"]) gr.Markdown("**Instructions:** Record button دبا کر real-time audio record کریں") live_audio = gr.Audio( label="Live Audio Recording", sources=["microphone"], streaming=True, type="numpy" ) process_btn3 = gr.Button( "🔴 Process Live Sync", variant="primary", size="lg" ) with gr.Column(scale=1): gr.Markdown("### 🎬 Live Result", elem_classes=["feature-card"]) output_video3 = gr.Video( label="Live Sync Result", height=300, elem_classes=["output-video"] ) output_message3 = gr.Textbox( label="Live Status", lines=4, max_lines=6 ) process_btn3.click( process_live_recording, inputs=[video_input3, live_audio], outputs=[output_video3, output_message3] ) # Tab 4: Advanced Settings with gr.TabItem("⚙️ Advanced Settings", elem_id="tab-settings"): gr.Markdown("### 🛠️ Model Configuration", elem_classes=["feature-card"]) model_choice = gr.Dropdown( label="🤖 Lip Sync Model", choices=["Wav2Lip (Fast)", "MuseTalk (Quality)", "SadTalker (Advanced)"], value="Wav2Lip (Fast)" ) quality_setting = gr.Slider( label="📊 Output Quality", minimum=480, maximum=1080, value=720, step=240, info="Higher = Better quality, Slower processing" ) fps_setting = gr.Slider( label="🎬 FPS Setting", minimum=15, maximum=60, value=25, step=5 ) gr.Markdown("### 📱 Mobile Optimization", elem_classes=["feature-card"]) mobile_mode = gr.Checkbox( label="📱 Mobile Optimization Mode", value=True, info="Optimize for mobile devices (faster processing)" ) batch_processing = gr.Checkbox( label="⚡ Batch Processing", value=False, info="Process multiple files (desktop only)" ) save_btn = gr.Button("💾 Save Settings", variant="secondary") settings_status = gr.Textbox(label="Settings Status", lines=2) save_btn.click( lambda *args: "✅ Settings saved successfully!", outputs=[settings_status] ) # Footer gr.Markdown( """ --- ### 📱 Mobile Instructions: - **iPad/Tablet:** All features fully supported - **Phone:** Optimized for touch interactions - **Performance:** Auto-adjusts based on device capabilities **🔧 Powered by:** Gradio + Hugging Face Spaces | **👨‍💻 Author:** MiniMax Agent """, elem_classes=["feature-card"] ) # Launch the app - FIXED VERSION if __name__ == "__main__": demo.launch()