Spaces:

mich123geb
/

wav2lip_api

Runtime error

App Files Files Community

mich123geb commited on Jul 13

Commit

a30d89d

verified ·

1 Parent(s): 33bc7a2

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -45

app.py CHANGED Viewed

@@ -1,61 +1,81 @@
-import gradio as gr
 import os
-import subprocess
 import uuid
 from PIL import Image
 from pydub import AudioSegment
-# Auto-install scipy if needed
-try:
-    import scipy
-except ImportError:
-    os.system("pip install scipy")
-    import scipy
-# Download Wav2Lip model if not already downloaded
-if not os.path.exists("wav2lip_gan.pth"):
-    os.system("wget https://huggingface.co/spaces/ZALAME-HAFE/w2l-hf/resolve/main/wav2lip_gan.pth")
-def preprocess(image, audio_path):
-    uid = str(uuid.uuid4())
-    image_path = f"{uid}_image.jpg"
-    audio_out_path = f"{uid}_audio.wav"
-    output_path = f"{uid}_output.mp4"
-    # Resize image height to 256, keep aspect ratio
     image = image.resize((int(image.width * 256 / image.height), 256), Image.Resampling.LANCZOS)
-    image.save(image_path)
-    # Convert audio to 16kHz mono WAV
-    audio = AudioSegment.from_file(audio_path)
-    audio = audio.set_frame_rate(16000).set_channels(1)
-    audio.export(audio_out_path, format="wav")
-    return image_path, audio_out_path, output_path
-def generate(image, audio_file):
-    image_path, audio_path, output_path = preprocess(image, audio_file)
-    command = [
-        "python", "inference.py",
-        "--checkpoint_path", "wav2lip_gan.pth",
-        "--face", image_path,
-        "--audio", audio_path,
-        "--outfile", output_path
-    ]
-    subprocess.run(command)
-    return output_path
-# Gradio interface
-gr.Interface(
     fn=generate,
-    inputs=[
-        gr.Image(type="pil", label="Upload Image"),
-        gr.Audio(type="filepath", label="Upload Audio (any format)")
-    ],
-    outputs=gr.Video(label="Generated Talking Video"),
-    title="🗣️ Wav2Lip - Light & Fast",
-    description="Upload an image and audio to generate a lip-synced video. Optimized for Hugging Face CPU spaces using Pydub.",
-    live=True
-).launch()

 import os
 import uuid
+import subprocess
+from pathlib import Path
+import gradio as gr
 from PIL import Image
 from pydub import AudioSegment
+# ──────────────────────────────────────────────
+# 1.  Download model checkpoint once
+# ──────────────────────────────────────────────
+MODEL_PATH = Path("wav2lip_gan.pth")
+MODEL_URL  = (
+    "https://huggingface.co/spaces/fffiloni/wav2lip/resolve/main/wav2lip_gan.pth"
+)  # public mirror
+if not MODEL_PATH.exists():
+    os.system(f"wget -q {MODEL_URL} -O {MODEL_PATH}")
+# ──────────────────────────────────────────────
+# 2.  Helper: resize image + convert audio → 16 kHz mono WAV
+# ──────────────────────────────────────────────
+def preprocess(image, audio_file):
+    if image is None or audio_file is None:
+        raise ValueError("Both an image and an audio file are required.")
+    uid = uuid.uuid4().hex
+    img_path   = f"{uid}.jpg"
+    wav_path   = f"{uid}.wav"
+    out_path   = f"{uid}_result.mp4"
+    # resize image to 256 px height (keeps aspect ratio)
     image = image.resize((int(image.width * 256 / image.height), 256), Image.Resampling.LANCZOS)
+    image.save(img_path)
+    # convert audio to 16 kHz mono WAV
+    seg = AudioSegment.from_file(audio_file)
+    seg = seg.set_frame_rate(16_000).set_channels(1)
+    seg.export(wav_path, format="wav")
+    return img_path, wav_path, out_path
+# ──────────────────────────────────────────────
+# 3.  Main inference wrapper
+# ──────────────────────────────────────────────
+def generate(image, audio):
+    try:
+        img, wav, out_vid = preprocess(image, audio)
+    except Exception as e:
+        return f"❌ {e}"
+    subprocess.run(
+        [
+            "python", "inference.py",
+            "--checkpoint_path", str(MODEL_PATH),
+            "--face", img,
+            "--audio", wav,
+            "--outfile", out_vid,
+        ],
+        check=True,
+    )
+    return out_vid if Path(out_vid).exists() else "❌ Generation failed."
+# ──────────────────────────────────────────────
+# 4.  Gradio UI
+# ──────────────────────────────────────────────
+demo = gr.Interface(
     fn=generate,
+    inputs=[gr.Image(type="pil", label="Image"),
+            gr.Audio(type="filepath", label="Audio (any format)")],
+    outputs=gr.Video(label="Talking-head MP4"),
+    title="🗣️ Wav2Lip CPU Demo",
+    description="Upload a single face image and an audio clip to create a lip-synced video (runs on free CPU tier).",
+    allow_flagging="never",
+    live=True,
+)
+if __name__ == "__main__":
+    demo.launch()