Spaces:

banao-tech
/

model-testing

Paused

File size: 2,224 Bytes

a2cf6b9
 
400a8bd
a2cf6b9
 
 
400a8bd
 
 
a2cf6b9
400a8bd
 
a2cf6b9
400a8bd
a2cf6b9
 
400a8bd
a2cf6b9
400a8bd
 
 
 
 
 
 
a2cf6b9
400a8bd
a2cf6b9
400a8bd
 
 
a2cf6b9
 
 
 
400a8bd
 
 
 
 
 
 
 
 
 
 
 
 
a2cf6b9
400a8bd
 
 
 
a2cf6b9
400a8bd
 
 
 
 
 
 
a2cf6b9
 
400a8bd
 
 
 
a2cf6b9
400a8bd
 
 
a2cf6b9
400a8bd
a2cf6b9
 
400a8bd

import os
import gradio as gr
import subprocess
from pathlib import Path
import shutil

# Simple setup
ROOT = Path("/tmp/musehub")
ROOT.mkdir(exist_ok=True)

def generate_lipsync(image_path, audio_path):
    """Generate lip-synced video using simple ffmpeg + ML approach"""
    try:
        if not image_path or not audio_path:
            return None, "❌ Please upload both image and audio"
        
        output = ROOT / "output.mp4"
        
        # Method 1: Use video-retalking (simpler and works on T4)
        cmd = [
            "python", "inference.py",
            "--face", image_path,
            "--audio", audio_path,
            "--outfile", str(output)
        ]
        
        subprocess.run(cmd, check=True, cwd="/app/video-retalking")
        
        if output.exists():
            return str(output), "✅ Video generated!"
        return None, "❌ Generation failed"
        
    except Exception as e:
        return None, f"❌ Error: {str(e)}"

def setup_model():
    """Download and setup video-retalking"""
    repo_dir = Path("/app/video-retalking")
    if repo_dir.exists():
        return True
    
    try:
        # Clone repo
        subprocess.run([
            "git", "clone",
            "https://github.com/OpenTalker/video-retalking.git",
            str(repo_dir)
        ], check=True)
        
        # Download checkpoints
        subprocess.run([
            "bash", "scripts/download_models.sh"
        ], cwd=repo_dir, check=True)
        
        return True
    except:
        return False

# Gradio UI
with gr.Blocks(title="AI Lip Sync") as demo:
    gr.Markdown("# 🎤 AI Lip Sync Generator\nUpload a face image and audio to create lip-synced video")
    
    with gr.Row():
        with gr.Column():
            image = gr.Image(type="filepath", label="Face Image")
            audio = gr.Audio(type="filepath", label="Audio File")
            btn = gr.Button("🚀 Generate", variant="primary")
            
        with gr.Column():
            video = gr.Video(label="Result")
            status = gr.Textbox(label="Status")
    
    btn.click(generate_lipsync, [image, audio], [video, status])

if __name__ == "__main__":
    setup_model()
    demo.launch()