Spaces:

Madras1
/

sadtalker-api

Sleeping

App Files Files Community

Madras1 commited on 27 days ago

Commit

1e135d7

verified ·

1 Parent(s): 0ab5505

Upload 3 files

Browse files

Files changed (3) hide show

README.md +49 -13
app.py +154 -0
requirements.txt +15 -0

README.md CHANGED Viewed

@@ -1,13 +1,49 @@
----
-title: Sadtalker Api
-emoji: 🚀
-colorFrom: green
-colorTo: blue
-sdk: gradio
-sdk_version: 6.2.0
-app_file: app.py
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: SadTalker API
+emoji: 🎭
+colorFrom: purple
+colorTo: pink
+sdk: gradio
+sdk_version: 4.44.0
+app_file: app.py
+pinned: false
+---
+# SadTalker API
+Talking head generation API using SadTalker in CPU mode.
+## Features
+- Generates video from face image + audio
+- Runs on CPU (no GPU required)
+- Returns base64 encoded video
+## Usage
+### Via UI
+Upload an image and audio file, click Generate.
+### Via API
+```python
+import requests
+import base64
+# Read files
+with open("face.png", "rb") as f:
+    image_b64 = base64.b64encode(f.read()).decode()
+with open("audio.mp3", "rb") as f:
+    audio_b64 = base64.b64encode(f.read()).decode()
+# Call API
+response = requests.post(
+    "https://your-space.hf.space/api/predict",
+    json={"data": [image_b64, audio_b64]}
+)
+video_b64 = response.json()["data"][0]
+```
+## Notes
+- First run will download ~2GB of model weights
+- Each generation takes 1-2 minutes on CPU

app.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import gradio as gr
+import subprocess
+import tempfile
+import base64
+import os
+import shutil
+# Clone SadTalker on first run
+SADTALKER_DIR = "/home/user/SadTalker"
+def setup_sadtalker():
+    """Clone and setup SadTalker if not already done"""
+    if not os.path.exists(SADTALKER_DIR):
+        print("Cloning SadTalker...")
+        subprocess.run([
+            "git", "clone", "--depth", "1",
+            "https://github.com/OpenTalker/SadTalker.git",
+            SADTALKER_DIR
+        ], check=True)
+        # Download checkpoints
+        print("Downloading checkpoints...")
+        os.makedirs(f"{SADTALKER_DIR}/checkpoints", exist_ok=True)
+        # Download from HuggingFace
+        subprocess.run([
+            "pip", "install", "huggingface_hub"
+        ], check=True)
+        from huggingface_hub import snapshot_download
+        snapshot_download(
+            repo_id="vinthony/SadTalker",
+            local_dir=f"{SADTALKER_DIR}/checkpoints",
+            local_dir_use_symlinks=False
+        )
+    return True
+def generate_video(image_path: str, audio_path: str) -> str:
+    """
+    Generate talking head video from image and audio
+    Returns: path to generated video
+    """
+    setup_sadtalker()
+    with tempfile.TemporaryDirectory() as tmpdir:
+        output_dir = os.path.join(tmpdir, "output")
+        os.makedirs(output_dir, exist_ok=True)
+        # Run SadTalker inference
+        cmd = [
+            "python", f"{SADTALKER_DIR}/inference.py",
+            "--driven_audio", audio_path,
+            "--source_image", image_path,
+            "--result_dir", output_dir,
+            "--still",  # Less movement, faster
+            "--preprocess", "crop",
+            "--cpu"  # Force CPU mode
+        ]
+        print(f"Running: {' '.join(cmd)}")
+        result = subprocess.run(cmd, capture_output=True, text=True, cwd=SADTALKER_DIR)
+        if result.returncode != 0:
+            print(f"Error: {result.stderr}")
+            raise Exception(f"SadTalker failed: {result.stderr}")
+        # Find generated video
+        for root, dirs, files in os.walk(output_dir):
+            for f in files:
+                if f.endswith(".mp4"):
+                    video_path = os.path.join(root, f)
+                    # Read and return as base64
+                    with open(video_path, "rb") as vf:
+                        return base64.b64encode(vf.read()).decode("utf-8")
+        raise Exception("No video generated")
+def api_generate(image_base64: str, audio_base64: str) -> dict:
+    """API endpoint for generating video"""
+    try:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Save image
+            image_path = os.path.join(tmpdir, "input.png")
+            with open(image_path, "wb") as f:
+                f.write(base64.b64decode(image_base64))
+            # Save audio
+            audio_path = os.path.join(tmpdir, "input.mp3")
+            with open(audio_path, "wb") as f:
+                f.write(base64.b64decode(audio_base64))
+            # Generate video
+            video_base64 = generate_video(image_path, audio_path)
+            return {"success": True, "video_base64": video_base64}
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+# Gradio interface for testing
+def gradio_generate(image, audio):
+    """Gradio interface wrapper"""
+    if image is None or audio is None:
+        return None
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # Save uploaded files
+        image_path = os.path.join(tmpdir, "input.png")
+        audio_path = os.path.join(tmpdir, "input.mp3")
+        # Handle image (could be numpy array or path)
+        if isinstance(image, str):
+            shutil.copy(image, image_path)
+        else:
+            from PIL import Image
+            Image.fromarray(image).save(image_path)
+        # Handle audio
+        shutil.copy(audio, audio_path)
+        # Generate
+        video_base64 = generate_video(image_path, audio_path)
+        # Save to temp file for Gradio
+        output_path = os.path.join(tmpdir, "output.mp4")
+        with open(output_path, "wb") as f:
+            f.write(base64.b64decode(video_base64))
+        return output_path
+# Create Gradio app with API
+with gr.Blocks() as demo:
+    gr.Markdown("# SadTalker API 🎭")
+    gr.Markdown("Generate talking head videos from image + audio")
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(label="Face Image", type="filepath")
+            audio_input = gr.Audio(label="Audio", type="filepath")
+            generate_btn = gr.Button("Generate", variant="primary")
+        with gr.Column():
+            video_output = gr.Video(label="Result")
+    generate_btn.click(
+        fn=gradio_generate,
+        inputs=[image_input, audio_input],
+        outputs=video_output
+    )
+# Launch with API enabled
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+gradio>=4.0.0
+torch --index-url https://download.pytorch.org/whl/cpu
+torchvision --index-url https://download.pytorch.org/whl/cpu
+torchaudio --index-url https://download.pytorch.org/whl/cpu
+numpy
+scipy
+opencv-python-headless
+imageio
+imageio-ffmpeg
+pydub
+gfpgan
+face_alignment
+dlib-bin
+huggingface_hub
+Pillow