Spaces:

banao-tech
/

model-testing

Build error

App Files Files Community

banao-tech commited on 14 days ago

Commit

a2cf6b9

verified ·

1 Parent(s): edc6454

Create app.py

Browse files

Files changed (1) hide show

app.py +185 -0

app.py ADDED Viewed

	@@ -0,0 +1,185 @@

+import os
+import subprocess
+import gradio as gr
+from pathlib import Path
+from datetime import datetime
+import shutil
+# Environment setup
+os.environ["GRADIO_TEMP_DIR"] = "/tmp/gradio"
+os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
+ROOT = Path(__file__).parent
+MUSETALK_DIR = ROOT / "MuseTalk"
+MODELS_DIR = MUSETALK_DIR / "models"
+RESULTS_DIR = MUSETALK_DIR / "results"
+def run_command(cmd, cwd=None):
+    """Run shell command safely"""
+    try:
+        result = subprocess.run(
+            cmd,
+            shell=True,
+            cwd=cwd,
+            capture_output=True,
+            text=True,
+            check=True
+        )
+        print(result.stdout)
+        return True
+    except subprocess.CalledProcessError as e:
+        print(f"Error: {e.stderr}")
+        return False
+def setup_musetalk():
+    """Setup MuseTalk repository and models"""
+    if not MUSETALK_DIR.exists():
+        print("📦 Cloning MuseTalk...")
+        run_command(f"git clone https://github.com/TMElyralab/MuseTalk.git {MUSETALK_DIR}")
+    # Create necessary directories
+    MODELS_DIR.mkdir(parents=True, exist_ok=True)
+    RESULTS_DIR.mkdir(parents=True, exist_ok=True)
+    # Download models if not present
+    if not (MODELS_DIR / "musetalk").exists():
+        print("📥 Downloading MuseTalk models...")
+        run_command(
+            "huggingface-cli download TMElyralab/MuseTalk --local-dir models",
+            cwd=MUSETALK_DIR
+        )
+    # Download Whisper model for audio processing
+    if not (MODELS_DIR / "whisper").exists():
+        print("📥 Downloading Whisper...")
+        run_command(
+            "huggingface-cli download openai/whisper-tiny --local-dir models/whisper",
+            cwd=MUSETALK_DIR
+        )
+    print("✅ Setup complete!")
+    return True
+def generate_video(avatar_image, audio_file, bbox_shift=0):
+    """Generate lip-synced video using MuseTalk"""
+    try:
+        # Setup on first run
+        if not MUSETALK_DIR.exists():
+            if not setup_musetalk():
+                return None, "❌ Setup failed"
+        if not avatar_image or not audio_file:
+            return None, "❌ Please upload both image and audio"
+        # Prepare input files
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        input_img = MUSETALK_DIR / f"input_img_{timestamp}.jpg"
+        input_audio = MUSETALK_DIR / f"input_audio_{timestamp}.wav"
+        shutil.copy(avatar_image, input_img)
+        shutil.copy(audio_file, input_audio)
+        # Run MuseTalk inference
+        print("🎬 Generating lip-synced video...")
+        output_path = RESULTS_DIR / f"output_{timestamp}.mp4"
+        cmd = f"""python -m scripts.inference \
+            --avatar_id "{input_img}" \
+            --audio_path "{input_audio}" \
+            --bbox_shift {bbox_shift} \
+            --result_dir "{RESULTS_DIR}"
+        """
+        if not run_command(cmd, cwd=MUSETALK_DIR):
+            return None, "❌ Video generation failed"
+        # Find generated video
+        video_files = list(RESULTS_DIR.glob(f"*{timestamp}*.mp4"))
+        if not video_files:
+            # Try finding any recent video
+            video_files = sorted(RESULTS_DIR.glob("*.mp4"), key=os.path.getmtime, reverse=True)
+        if video_files:
+            return str(video_files[0]), "✅ Video generated successfully!"
+        else:
+            return None, "❌ Output video not found"
+    except Exception as e:
+        return None, f"❌ Error: {str(e)}"
+# Gradio Interface
+with gr.Blocks(theme=gr.themes.Soft(), title="MuseTalk Lip Sync") as demo:
+    gr.Markdown(
+        """
+        # 🎤 MuseTalk - AI Lip Sync Generator
+        Upload a face image and audio to create realistic lip-synced videos!
+        **✨ Features:**
+        - Fast generation (~30 seconds)
+        - High quality lip sync
+        - Works on T4 GPU
+        - Supports various face angles
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            avatar = gr.Image(
+                type="filepath",
+                label="📷 Face Image",
+                height=300
+            )
+            audio = gr.Audio(
+                type="filepath",
+                label="🎵 Audio File"
+            )
+            bbox_shift = gr.Slider(
+                -20, 20, value=0, step=1,
+                label="Face Alignment Adjustment",
+                info="Adjust if face detection is off"
+            )
+        with gr.Column(scale=1):
+            output_video = gr.Video(
+                label="🎬 Generated Video",
+                height=400
+            )
+            status = gr.Textbox(
+                label="Status",
+                interactive=False,
+                value="Ready to generate..."
+            )
+    generate_btn = gr.Button(
+        "🚀 Generate Lip-Synced Video",
+        variant="primary",
+        size="lg"
+    )
+    generate_btn.click(
+        fn=generate_video,
+        inputs=[avatar, audio, bbox_shift],
+        outputs=[output_video, status]
+    )
+    gr.Markdown(
+        """
+        ---
+        ### 📝 Tips:
+        - Use clear, front-facing images for best results
+        - Recommended resolution: 512x512 or higher
+        - Audio should be clear with minimal background noise
+        - First run downloads models (~3GB) - please wait
+        ### ⚡ Performance:
+        - T4 GPU: ~30-60 seconds per video
+        - Supports videos up to 2 minutes
+        **Powered by [MuseTalk](https://github.com/TMElyralab/MuseTalk)**
+        """
+    )
+if __name__ == "__main__":
+    demo.queue(max_size=5)
+    demo.launch(server_name="0.0.0.0", server_port=7860)