Spaces:

Madras1
/

sadtalker-api

Sleeping

App Files Files Community

Madras1 commited on 17 days ago

Commit

4c79f9d

verified ·

1 Parent(s): 30610b2

Upload 3 files

Browse files

Files changed (3) hide show

README.md +6 -32
app.py +68 -52
requirements.txt +4 -3

README.md CHANGED Viewed

@@ -9,41 +9,15 @@ app_file: app.py
 pinned: false
 ---
-# SadTalker API
-Talking head generation API using SadTalker in CPU mode.
 ## Features
-- Generates video from face image + audio
-- Runs on CPU (no GPU required)
-- Returns base64 encoded video
 ## Usage
-### Via UI
-Upload an image and audio file, click Generate.
-### Via API
-```python
-import requests
-import base64
-# Read files
-with open("face.png", "rb") as f:
-    image_b64 = base64.b64encode(f.read()).decode()
-with open("audio.mp3", "rb") as f:
-    audio_b64 = base64.b64encode(f.read()).decode()
-# Call API
-response = requests.post(
-    "https://your-space.hf.space/api/predict",
-    json={"data": [image_b64, audio_b64]}
-)
-video_b64 = response.json()["data"][0]
-```
-## Notes
-- First run will download ~2GB of model weights
-- Each generation takes 1-2 minutes on CPU

 pinned: false
 ---
+# SadTalker API 🎭
+Talking head generation using SadTalker with **ZeroGPU**.
 ## Features
+- ⚡ GPU-accelerated (~20-40 seconds)
+- 🎨 Face enhancement with GFPGAN
+- 📹 Returns MP4 video
 ## Usage
+Upload a face image and audio file, click Generate.

app.py CHANGED Viewed

@@ -1,11 +1,13 @@
 import gradio as gr
 import subprocess
 import tempfile
 import base64
 import os
 import shutil
-# Clone SadTalker on first run
 SADTALKER_DIR = "/home/user/SadTalker"
 def setup_sadtalker():
@@ -18,15 +20,14 @@ def setup_sadtalker():
             SADTALKER_DIR
         ], check=True)
-        # Download checkpoints
-        print("Downloading checkpoints...")
-        os.makedirs(f"{SADTALKER_DIR}/checkpoints", exist_ok=True)
-        # Download from HuggingFace
         subprocess.run([
-            "pip", "install", "huggingface_hub"
         ], check=True)
         from huggingface_hub import snapshot_download
         snapshot_download(
             repo_id="vinthony/SadTalker",
@@ -36,33 +37,45 @@ def setup_sadtalker():
     return True
 def generate_video(image_path: str, audio_path: str) -> str:
     """
     Generate talking head video from image and audio
-    Returns: path to generated video
     """
     setup_sadtalker()
     with tempfile.TemporaryDirectory() as tmpdir:
         output_dir = os.path.join(tmpdir, "output")
         os.makedirs(output_dir, exist_ok=True)
-        # Run SadTalker inference
         cmd = [
-            "python", f"{SADTALKER_DIR}/inference.py",
             "--driven_audio", audio_path,
             "--source_image", image_path,
             "--result_dir", output_dir,
             "--still",  # Less movement, faster
             "--preprocess", "crop",
-            "--cpu"  # Force CPU mode
         ]
         print(f"Running: {' '.join(cmd)}")
-        result = subprocess.run(cmd, capture_output=True, text=True, cwd=SADTALKER_DIR)
         if result.returncode != 0:
-            print(f"Error: {result.stderr}")
             raise Exception(f"SadTalker failed: {result.stderr}")
         # Find generated video
@@ -70,12 +83,46 @@ def generate_video(image_path: str, audio_path: str) -> str:
             for f in files:
                 if f.endswith(".mp4"):
                     video_path = os.path.join(root, f)
-                    # Read and return as base64
                     with open(video_path, "rb") as vf:
                         return base64.b64encode(vf.read()).decode("utf-8")
         raise Exception("No video generated")
 def api_generate(image_base64: str, audio_base64: str) -> dict:
     """API endpoint for generating video"""
     try:
@@ -98,50 +145,20 @@ def api_generate(image_base64: str, audio_base64: str) -> dict:
     except Exception as e:
         return {"success": False, "error": str(e)}
-# Gradio interface for testing
-def gradio_generate(image, audio):
-    """Gradio interface wrapper"""
-    if image is None or audio is None:
-        return None
-    with tempfile.TemporaryDirectory() as tmpdir:
-        # Save uploaded files
-        image_path = os.path.join(tmpdir, "input.png")
-        audio_path = os.path.join(tmpdir, "input.mp3")
-        # Handle image (could be numpy array or path)
-        if isinstance(image, str):
-            shutil.copy(image, image_path)
-        else:
-            from PIL import Image
-            Image.fromarray(image).save(image_path)
-        # Handle audio
-        shutil.copy(audio, audio_path)
-        # Generate
-        video_base64 = generate_video(image_path, audio_path)
-        # Save to temp file for Gradio
-        output_path = os.path.join(tmpdir, "output.mp4")
-        with open(output_path, "wb") as f:
-            f.write(base64.b64decode(video_base64))
-        return output_path
-# Create Gradio app with API
-with gr.Blocks() as demo:
-    gr.Markdown("# SadTalker API 🎭")
-    gr.Markdown("Generate talking head videos from image + audio")
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(label="Face Image", type="filepath")
             audio_input = gr.Audio(label="Audio", type="filepath")
-            generate_btn = gr.Button("Generate", variant="primary")
         with gr.Column():
-            video_output = gr.Video(label="Result")
     generate_btn.click(
         fn=gradio_generate,
@@ -149,6 +166,5 @@ with gr.Blocks() as demo:
         outputs=video_output
     )
-# Launch with API enabled
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
+import spaces
 import subprocess
 import tempfile
 import base64
 import os
 import shutil
+import sys
+# SadTalker path
 SADTALKER_DIR = "/home/user/SadTalker"
 def setup_sadtalker():
             SADTALKER_DIR
         ], check=True)
+        # Install SadTalker requirements
         subprocess.run([
+            sys.executable, "-m", "pip", "install", "-r",
+            f"{SADTALKER_DIR}/requirements.txt"
         ], check=True)
+        # Download checkpoints from HuggingFace
+        print("Downloading checkpoints...")
         from huggingface_hub import snapshot_download
         snapshot_download(
             repo_id="vinthony/SadTalker",
     return True
+@spaces.GPU(duration=120)  # Request GPU for up to 120 seconds
 def generate_video(image_path: str, audio_path: str) -> str:
     """
     Generate talking head video from image and audio
+    Returns: base64 encoded video
     """
     setup_sadtalker()
+    # Add SadTalker to path
+    if SADTALKER_DIR not in sys.path:
+        sys.path.insert(0, SADTALKER_DIR)
     with tempfile.TemporaryDirectory() as tmpdir:
         output_dir = os.path.join(tmpdir, "output")
         os.makedirs(output_dir, exist_ok=True)
+        # Run SadTalker inference (GPU mode - no --cpu flag)
         cmd = [
+            sys.executable, f"{SADTALKER_DIR}/inference.py",
             "--driven_audio", audio_path,
             "--source_image", image_path,
             "--result_dir", output_dir,
             "--still",  # Less movement, faster
             "--preprocess", "crop",
+            "--enhancer", "gfpgan"  # Face enhancement
         ]
         print(f"Running: {' '.join(cmd)}")
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            cwd=SADTALKER_DIR,
+            env={**os.environ, "CUDA_VISIBLE_DEVICES": "0"}
+        )
         if result.returncode != 0:
+            print(f"STDOUT: {result.stdout}")
+            print(f"STDERR: {result.stderr}")
             raise Exception(f"SadTalker failed: {result.stderr}")
         # Find generated video
             for f in files:
                 if f.endswith(".mp4"):
                     video_path = os.path.join(root, f)
                     with open(video_path, "rb") as vf:
                         return base64.b64encode(vf.read()).decode("utf-8")
         raise Exception("No video generated")
+def gradio_generate(image, audio):
+    """Gradio interface wrapper"""
+    if image is None or audio is None:
+        return None
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # Save uploaded files
+        image_path = os.path.join(tmpdir, "input.png")
+        audio_path = audio  # Gradio gives us filepath directly
+        # Handle image
+        if isinstance(image, str):
+            shutil.copy(image, image_path)
+        else:
+            from PIL import Image
+            Image.fromarray(image).save(image_path)
+        try:
+            # Generate video
+            video_base64 = generate_video(image_path, audio_path)
+            # Save to temp file for Gradio output
+            output_path = os.path.join(tmpdir, "output.mp4")
+            with open(output_path, "wb") as f:
+                f.write(base64.b64decode(video_base64))
+            # Copy to persistent location
+            final_path = "/tmp/sadtalker_output.mp4"
+            shutil.copy(output_path, final_path)
+            return final_path
+        except Exception as e:
+            raise gr.Error(f"Generation failed: {str(e)}")
+# API function for external calls
 def api_generate(image_base64: str, audio_base64: str) -> dict:
     """API endpoint for generating video"""
     try:
     except Exception as e:
         return {"success": False, "error": str(e)}
+# Create Gradio app
+with gr.Blocks(title="SadTalker API") as demo:
+    gr.Markdown("# 🎭 SadTalker API")
+    gr.Markdown("Generate talking head videos from image + audio (ZeroGPU)")
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(label="Face Image", type="filepath")
             audio_input = gr.Audio(label="Audio", type="filepath")
+            generate_btn = gr.Button("🎬 Generate Video", variant="primary", size="lg")
         with gr.Column():
+            video_output = gr.Video(label="Generated Video")
+            gr.Markdown("⏱️ Takes ~20-40 seconds with GPU")
     generate_btn.click(
         fn=gradio_generate,
         outputs=video_output
     )
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

requirements.txt CHANGED Viewed

@@ -1,14 +1,14 @@
 # Core
 gradio==4.44.0
 huggingface_hub==0.25.0
-# PyTorch CPU
---extra-index-url https://download.pytorch.org/whl/cpu
 torch
 torchvision
 torchaudio
-# SadTalker deps
 numpy<2.0.0
 scipy
 opencv-python-headless
@@ -29,3 +29,4 @@ basicsr
 facexlib
 kornia
 safetensors

 # Core
 gradio==4.44.0
 huggingface_hub==0.25.0
+spaces
+# PyTorch CUDA (ZeroGPU will handle this)
 torch
 torchvision
 torchaudio
+# SadTalker deps
 numpy<2.0.0
 scipy
 opencv-python-headless
 facexlib
 kornia
 safetensors
+gfpgan