Spaces:

Madras1
/

sadtalker-api

Running on Zero

App Files Files Community

Madras1 commited on Dec 25, 2025

Commit

f4d455b

verified ·

1 Parent(s): 4c79f9d

Upload 3 files

Browse files

Files changed (1) hide show

app.py +68 -90

app.py CHANGED Viewed

@@ -21,10 +21,11 @@ def setup_sadtalker():
         ], check=True)
         # Install SadTalker requirements
         subprocess.run([
-            sys.executable, "-m", "pip", "install", "-r",
             f"{SADTALKER_DIR}/requirements.txt"
-        ], check=True)
         # Download checkpoints from HuggingFace
         print("Downloading checkpoints...")
@@ -37,113 +38,90 @@ def setup_sadtalker():
     return True
-@spaces.GPU(duration=120)  # Request GPU for up to 120 seconds
-def generate_video(image_path: str, audio_path: str) -> str:
-    """
-    Generate talking head video from image and audio
-    Returns: base64 encoded video
-    """
     setup_sadtalker()
     # Add SadTalker to path
     if SADTALKER_DIR not in sys.path:
         sys.path.insert(0, SADTALKER_DIR)
-    with tempfile.TemporaryDirectory() as tmpdir:
-        output_dir = os.path.join(tmpdir, "output")
-        os.makedirs(output_dir, exist_ok=True)
-        # Run SadTalker inference (GPU mode - no --cpu flag)
-        cmd = [
-            sys.executable, f"{SADTALKER_DIR}/inference.py",
-            "--driven_audio", audio_path,
-            "--source_image", image_path,
-            "--result_dir", output_dir,
-            "--still",  # Less movement, faster
-            "--preprocess", "crop",
-            "--enhancer", "gfpgan"  # Face enhancement
-        ]
-        print(f"Running: {' '.join(cmd)}")
-        result = subprocess.run(
-            cmd,
-            capture_output=True,
-            text=True,
-            cwd=SADTALKER_DIR,
-            env={**os.environ, "CUDA_VISIBLE_DEVICES": "0"}
-        )
-        if result.returncode != 0:
-            print(f"STDOUT: {result.stdout}")
-            print(f"STDERR: {result.stderr}")
-            raise Exception(f"SadTalker failed: {result.stderr}")
-        # Find generated video
-        for root, dirs, files in os.walk(output_dir):
-            for f in files:
-                if f.endswith(".mp4"):
-                    video_path = os.path.join(root, f)
-                    with open(video_path, "rb") as vf:
-                        return base64.b64encode(vf.read()).decode("utf-8")
-        raise Exception("No video generated")
 def gradio_generate(image, audio):
     """Gradio interface wrapper"""
     if image is None or audio is None:
-        return None
     with tempfile.TemporaryDirectory() as tmpdir:
         # Save uploaded files
         image_path = os.path.join(tmpdir, "input.png")
-        audio_path = audio  # Gradio gives us filepath directly
-        # Handle image
         if isinstance(image, str):
             shutil.copy(image, image_path)
         else:
             from PIL import Image
-            Image.fromarray(image).save(image_path)
-        try:
-            # Generate video
-            video_base64 = generate_video(image_path, audio_path)
-            # Save to temp file for Gradio output
-            output_path = os.path.join(tmpdir, "output.mp4")
-            with open(output_path, "wb") as f:
-                f.write(base64.b64decode(video_base64))
-            # Copy to persistent location
-            final_path = "/tmp/sadtalker_output.mp4"
-            shutil.copy(output_path, final_path)
-            return final_path
-        except Exception as e:
-            raise gr.Error(f"Generation failed: {str(e)}")
-# API function for external calls
-def api_generate(image_base64: str, audio_base64: str) -> dict:
-    """API endpoint for generating video"""
-    try:
-        with tempfile.TemporaryDirectory() as tmpdir:
-            # Save image
-            image_path = os.path.join(tmpdir, "input.png")
-            with open(image_path, "wb") as f:
-                f.write(base64.b64decode(image_base64))
-            # Save audio
-            audio_path = os.path.join(tmpdir, "input.mp3")
-            with open(audio_path, "wb") as f:
-                f.write(base64.b64decode(audio_base64))
-            # Generate video
-            video_base64 = generate_video(image_path, audio_path)
-            return {"success": True, "video_base64": video_base64}
-    except Exception as e:
-        return {"success": False, "error": str(e)}
 # Create Gradio app
 with gr.Blocks(title="SadTalker API") as demo:
@@ -158,7 +136,7 @@ with gr.Blocks(title="SadTalker API") as demo:
         with gr.Column():
             video_output = gr.Video(label="Generated Video")
-            gr.Markdown("⏱️ Takes ~20-40 seconds with GPU")
     generate_btn.click(
         fn=gradio_generate,

         ], check=True)
         # Install SadTalker requirements
+        print("Installing SadTalker requirements...")
         subprocess.run([
+            sys.executable, "-m", "pip", "install", "-q", "-r",
             f"{SADTALKER_DIR}/requirements.txt"
+        ])
         # Download checkpoints from HuggingFace
         print("Downloading checkpoints...")
     return True
+@spaces.GPU(duration=120)
+def generate_video_gpu(image_path: str, audio_path: str, output_dir: str) -> str:
+    """GPU-accelerated video generation"""
     setup_sadtalker()
     # Add SadTalker to path
     if SADTALKER_DIR not in sys.path:
         sys.path.insert(0, SADTALKER_DIR)
+    # Run SadTalker inference
+    cmd = [
+        sys.executable, f"{SADTALKER_DIR}/inference.py",
+        "--driven_audio", audio_path,
+        "--source_image", image_path,
+        "--result_dir", output_dir,
+        "--still",
+        "--preprocess", "crop",
+    ]
+    print(f"Running: {' '.join(cmd)}")
+    result = subprocess.run(
+        cmd,
+        capture_output=True,
+        text=True,
+        cwd=SADTALKER_DIR
+    )
+    print(f"STDOUT: {result.stdout}")
+    if result.stderr:
+        print(f"STDERR: {result.stderr}")
+    if result.returncode != 0:
+        raise Exception(f"SadTalker failed: {result.stderr}")
+    # Find generated video
+    for root, dirs, files in os.walk(output_dir):
+        for f in files:
+            if f.endswith(".mp4"):
+                return os.path.join(root, f)
+    raise Exception("No video generated")
 def gradio_generate(image, audio):
     """Gradio interface wrapper"""
     if image is None or audio is None:
+        raise gr.Error("Por favor, envie uma imagem e um áudio")
     with tempfile.TemporaryDirectory() as tmpdir:
         # Save uploaded files
         image_path = os.path.join(tmpdir, "input.png")
+        audio_path = os.path.join(tmpdir, "input.wav")
+        output_dir = os.path.join(tmpdir, "output")
+        os.makedirs(output_dir, exist_ok=True)
+        # Handle image - Gradio gives filepath
         if isinstance(image, str):
             shutil.copy(image, image_path)
         else:
             from PIL import Image
+            if hasattr(image, 'save'):
+                image.save(image_path)
+            else:
+                Image.fromarray(image).save(image_path)
+        # Handle audio - Gradio gives filepath
+        if isinstance(audio, str):
+            shutil.copy(audio, audio_path)
+        elif isinstance(audio, tuple):
+            # (sample_rate, audio_data) format
+            import scipy.io.wavfile as wav
+            sr, data = audio
+            wav.write(audio_path, sr, data)
+        print(f"Image: {image_path}, exists: {os.path.exists(image_path)}")
+        print(f"Audio: {audio_path}, exists: {os.path.exists(audio_path)}")
+        # Generate video with GPU
+        video_path = generate_video_gpu(image_path, audio_path, output_dir)
+        # Copy to persistent location for Gradio
+        final_path = "/tmp/sadtalker_output.mp4"
+        shutil.copy(video_path, final_path)
+        return final_path
 # Create Gradio app
 with gr.Blocks(title="SadTalker API") as demo:
         with gr.Column():
             video_output = gr.Video(label="Generated Video")
+            gr.Markdown("⏱️ Takes ~30-60 seconds with GPU")
     generate_btn.click(
         fn=gradio_generate,