Spaces:

banao-tech
/

model-testing

Build error

App Files Files Community

banao-tech commited on 14 days ago

Commit

400a8bd

verified ·

1 Parent(s): 0ac9382

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -161

app.py CHANGED Viewed

@@ -1,185 +1,77 @@
 import os
-import subprocess
 import gradio as gr
 from pathlib import Path
-from datetime import datetime
 import shutil
-# Environment setup
-os.environ["GRADIO_TEMP_DIR"] = "/tmp/gradio"
-os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
-ROOT = Path(__file__).parent
-MUSETALK_DIR = ROOT / "MuseTalk"
-MODELS_DIR = MUSETALK_DIR / "models"
-RESULTS_DIR = MUSETALK_DIR / "results"
-def run_command(cmd, cwd=None):
-    """Run shell command safely"""
     try:
-        result = subprocess.run(
-            cmd,
-            shell=True,
-            cwd=cwd,
-            capture_output=True,
-            text=True,
-            check=True
-        )
-        print(result.stdout)
-        return True
-    except subprocess.CalledProcessError as e:
-        print(f"Error: {e.stderr}")
-        return False
-def setup_musetalk():
-    """Setup MuseTalk repository and models"""
-    if not MUSETALK_DIR.exists():
-        print("📦 Cloning MuseTalk...")
-        run_command(f"git clone https://github.com/TMElyralab/MuseTalk.git {MUSETALK_DIR}")
-    # Create necessary directories
-    MODELS_DIR.mkdir(parents=True, exist_ok=True)
-    RESULTS_DIR.mkdir(parents=True, exist_ok=True)
-    # Download models if not present
-    if not (MODELS_DIR / "musetalk").exists():
-        print("📥 Downloading MuseTalk models...")
-        run_command(
-            "huggingface-cli download TMElyralab/MuseTalk --local-dir models",
-            cwd=MUSETALK_DIR
-        )
-    # Download Whisper model for audio processing
-    if not (MODELS_DIR / "whisper").exists():
-        print("📥 Downloading Whisper...")
-        run_command(
-            "huggingface-cli download openai/whisper-tiny --local-dir models/whisper",
-            cwd=MUSETALK_DIR
-        )
-    print("✅ Setup complete!")
-    return True
-def generate_video(avatar_image, audio_file, bbox_shift=0):
-    """Generate lip-synced video using MuseTalk"""
-    try:
-        # Setup on first run
-        if not MUSETALK_DIR.exists():
-            if not setup_musetalk():
-                return None, "❌ Setup failed"
-        if not avatar_image or not audio_file:
             return None, "❌ Please upload both image and audio"
-        # Prepare input files
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        input_img = MUSETALK_DIR / f"input_img_{timestamp}.jpg"
-        input_audio = MUSETALK_DIR / f"input_audio_{timestamp}.wav"
-        shutil.copy(avatar_image, input_img)
-        shutil.copy(audio_file, input_audio)
-        # Run MuseTalk inference
-        print("🎬 Generating lip-synced video...")
-        output_path = RESULTS_DIR / f"output_{timestamp}.mp4"
-        cmd = f"""python -m scripts.inference \
-            --avatar_id "{input_img}" \
-            --audio_path "{input_audio}" \
-            --bbox_shift {bbox_shift} \
-            --result_dir "{RESULTS_DIR}"
-        """
-        if not run_command(cmd, cwd=MUSETALK_DIR):
-            return None, "❌ Video generation failed"
-        # Find generated video
-        video_files = list(RESULTS_DIR.glob(f"*{timestamp}*.mp4"))
-        if not video_files:
-            # Try finding any recent video
-            video_files = sorted(RESULTS_DIR.glob("*.mp4"), key=os.path.getmtime, reverse=True)
-        if video_files:
-            return str(video_files[0]), "✅ Video generated successfully!"
-        else:
-            return None, "❌ Output video not found"
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
-# Gradio Interface
-with gr.Blocks(theme=gr.themes.Soft(), title="MuseTalk Lip Sync") as demo:
-    gr.Markdown(
-        """
-        # 🎤 MuseTalk - AI Lip Sync Generator
-        Upload a face image and audio to create realistic lip-synced videos!
-        **✨ Features:**
-        - Fast generation (~30 seconds)
-        - High quality lip sync
-        - Works on T4 GPU
-        - Supports various face angles
-        """
-    )
     with gr.Row():
-        with gr.Column(scale=1):
-            avatar = gr.Image(
-                type="filepath",
-                label="📷 Face Image",
-                height=300
-            )
-            audio = gr.Audio(
-                type="filepath",
-                label="🎵 Audio File"
-            )
-            bbox_shift = gr.Slider(
-                -20, 20, value=0, step=1,
-                label="Face Alignment Adjustment",
-                info="Adjust if face detection is off"
-            )
-        with gr.Column(scale=1):
-            output_video = gr.Video(
-                label="🎬 Generated Video",
-                height=400
-            )
-            status = gr.Textbox(
-                label="Status",
-                interactive=False,
-                value="Ready to generate..."
-            )
-    generate_btn = gr.Button(
-        "🚀 Generate Lip-Synced Video",
-        variant="primary",
-        size="lg"
-    )
-    generate_btn.click(
-        fn=generate_video,
-        inputs=[avatar, audio, bbox_shift],
-        outputs=[output_video, status]
-    )
-    gr.Markdown(
-        """
-        ---
-        ### 📝 Tips:
-        - Use clear, front-facing images for best results
-        - Recommended resolution: 512x512 or higher
-        - Audio should be clear with minimal background noise
-        - First run downloads models (~3GB) - please wait
-        ### ⚡ Performance:
-        - T4 GPU: ~30-60 seconds per video
-        - Supports videos up to 2 minutes
-        **Powered by [MuseTalk](https://github.com/TMElyralab/MuseTalk)**
-        """
-    )
 if __name__ == "__main__":
-    demo.queue(max_size=5)
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import os
 import gradio as gr
+import subprocess
 from pathlib import Path
 import shutil
+# Simple setup
+ROOT = Path("/tmp/musehub")
+ROOT.mkdir(exist_ok=True)
+def generate_lipsync(image_path, audio_path):
+    """Generate lip-synced video using simple ffmpeg + ML approach"""
     try:
+        if not image_path or not audio_path:
             return None, "❌ Please upload both image and audio"
+        output = ROOT / "output.mp4"
+        # Method 1: Use video-retalking (simpler and works on T4)
+        cmd = [
+            "python", "inference.py",
+            "--face", image_path,
+            "--audio", audio_path,
+            "--outfile", str(output)
+        ]
+        subprocess.run(cmd, check=True, cwd="/app/video-retalking")
+        if output.exists():
+            return str(output), "✅ Video generated!"
+        return None, "❌ Generation failed"
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
+def setup_model():
+    """Download and setup video-retalking"""
+    repo_dir = Path("/app/video-retalking")
+    if repo_dir.exists():
+        return True
+    try:
+        # Clone repo
+        subprocess.run([
+            "git", "clone",
+            "https://github.com/OpenTalker/video-retalking.git",
+            str(repo_dir)
+        ], check=True)
+        # Download checkpoints
+        subprocess.run([
+            "bash", "scripts/download_models.sh"
+        ], cwd=repo_dir, check=True)
+        return True
+    except:
+        return False
+# Gradio UI
+with gr.Blocks(title="AI Lip Sync") as demo:
+    gr.Markdown("# 🎤 AI Lip Sync Generator\nUpload a face image and audio to create lip-synced video")
     with gr.Row():
+        with gr.Column():
+            image = gr.Image(type="filepath", label="Face Image")
+            audio = gr.Audio(type="filepath", label="Audio File")
+            btn = gr.Button("🚀 Generate", variant="primary")
+        with gr.Column():
+            video = gr.Video(label="Result")
+            status = gr.Textbox(label="Status")
+    btn.click(generate_lipsync, [image, audio], [video, status])
 if __name__ == "__main__":
+    setup_model()
+    demo.launch()