Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import spaces | |
| import subprocess | |
| import tempfile | |
| import base64 | |
| import os | |
| import shutil | |
| import sys | |
| # SadTalker path | |
| SADTALKER_DIR = "/home/user/SadTalker" | |
| def setup_sadtalker(): | |
| """Clone and setup SadTalker if not already done""" | |
| if not os.path.exists(SADTALKER_DIR): | |
| print("Cloning SadTalker...") | |
| subprocess.run([ | |
| "git", "clone", "--depth", "1", | |
| "https://github.com/OpenTalker/SadTalker.git", | |
| SADTALKER_DIR | |
| ], check=True) | |
| # Install SadTalker requirements | |
| print("Installing SadTalker requirements...") | |
| subprocess.run([ | |
| sys.executable, "-m", "pip", "install", "-q", "-r", | |
| f"{SADTALKER_DIR}/requirements.txt" | |
| ]) | |
| # Download checkpoints from HuggingFace | |
| print("Downloading checkpoints...") | |
| from huggingface_hub import snapshot_download | |
| snapshot_download( | |
| repo_id="vinthony/SadTalker", | |
| local_dir=f"{SADTALKER_DIR}/checkpoints", | |
| local_dir_use_symlinks=False | |
| ) | |
| return True | |
| def generate_video_gpu(image_path: str, audio_path: str, output_dir: str) -> str: | |
| """GPU-accelerated video generation""" | |
| setup_sadtalker() | |
| # Add SadTalker to path | |
| if SADTALKER_DIR not in sys.path: | |
| sys.path.insert(0, SADTALKER_DIR) | |
| # Run SadTalker inference | |
| cmd = [ | |
| sys.executable, f"{SADTALKER_DIR}/inference.py", | |
| "--driven_audio", audio_path, | |
| "--source_image", image_path, | |
| "--result_dir", output_dir, | |
| "--still", | |
| "--preprocess", "crop", | |
| ] | |
| print(f"Running: {' '.join(cmd)}") | |
| result = subprocess.run( | |
| cmd, | |
| capture_output=True, | |
| text=True, | |
| cwd=SADTALKER_DIR | |
| ) | |
| print(f"STDOUT: {result.stdout}") | |
| if result.stderr: | |
| print(f"STDERR: {result.stderr}") | |
| if result.returncode != 0: | |
| raise Exception(f"SadTalker failed: {result.stderr}") | |
| # Find generated video | |
| for root, dirs, files in os.walk(output_dir): | |
| for f in files: | |
| if f.endswith(".mp4"): | |
| return os.path.join(root, f) | |
| raise Exception("No video generated") | |
| def gradio_generate(image, audio): | |
| """Gradio interface wrapper""" | |
| # Debug logging | |
| print(f"=== RECEIVED ===") | |
| print(f"Image type: {type(image)}, value: {image}") | |
| print(f"Audio type: {type(audio)}, value: {audio}") | |
| # Better validation | |
| if not image: | |
| raise gr.Error("Envie uma imagem primeiro!") | |
| if not audio: | |
| raise gr.Error("Envie um áudio primeiro!") | |
| with tempfile.TemporaryDirectory() as tmpdir: | |
| image_path = os.path.join(tmpdir, "input.png") | |
| audio_path = os.path.join(tmpdir, "input.wav") | |
| output_dir = os.path.join(tmpdir, "output") | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Handle image | |
| try: | |
| if isinstance(image, str) and os.path.exists(image): | |
| shutil.copy(image, image_path) | |
| elif hasattr(image, 'name'): # File object | |
| shutil.copy(image.name, image_path) | |
| else: | |
| from PIL import Image as PILImage | |
| PILImage.fromarray(image).save(image_path) | |
| except Exception as e: | |
| raise gr.Error(f"Erro ao processar imagem: {e}") | |
| # Handle audio | |
| try: | |
| if isinstance(audio, str) and os.path.exists(audio): | |
| shutil.copy(audio, audio_path) | |
| elif hasattr(audio, 'name'): # File object | |
| shutil.copy(audio.name, audio_path) | |
| elif isinstance(audio, tuple): | |
| import scipy.io.wavfile as wav | |
| sr, data = audio | |
| wav.write(audio_path, sr, data) | |
| else: | |
| raise gr.Error(f"Formato de áudio não reconhecido: {type(audio)}") | |
| except Exception as e: | |
| raise gr.Error(f"Erro ao processar áudio: {e}") | |
| print(f"Image saved: {image_path}, exists: {os.path.exists(image_path)}") | |
| print(f"Audio saved: {audio_path}, exists: {os.path.exists(audio_path)}") | |
| # Generate video | |
| try: | |
| video_path = generate_video_gpu(image_path, audio_path, output_dir) | |
| # Copy to persistent location | |
| final_path = "/tmp/sadtalker_output.mp4" | |
| shutil.copy(video_path, final_path) | |
| return final_path | |
| except Exception as e: | |
| raise gr.Error(f"Erro na geração: {e}") | |
| # Create Gradio app | |
| with gr.Blocks(title="SadTalker API") as demo: | |
| gr.Markdown("# 🎭 SadTalker API") | |
| gr.Markdown("Generate talking head videos from image + audio (ZeroGPU)") | |
| with gr.Row(): | |
| with gr.Column(): | |
| image_input = gr.Image(label="Face Image", type="filepath") | |
| audio_input = gr.Audio(label="Audio", type="filepath") | |
| generate_btn = gr.Button("🎬 Generate Video", variant="primary", size="lg") | |
| with gr.Column(): | |
| video_output = gr.Video(label="Generated Video") | |
| gr.Markdown("⏱️ Takes ~30-60 seconds with GPU") | |
| generate_btn.click( | |
| fn=gradio_generate, | |
| inputs=[image_input, audio_input], | |
| outputs=video_output | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |