Spaces:

mich123geb
/

wav2lip_api

Runtime error

App Files Files Community

mich123geb commited on Jul 13

Commit

a37c88f

verified ·

1 Parent(s): ff32b6f

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -18

app.py CHANGED Viewed

@@ -1,20 +1,31 @@
 import gradio as gr
 import os
-import subprocess
 import uuid
 from PIL import Image
-# add at the top of app.py
 try:
-    import scipy
 except ImportError:
-    os.system("pip install scipy")
-    import scipy
-# ✅ Download model if not present
 if not os.path.exists("wav2lip_gan.pth"):
-    os.system("wget https://www.adrianbulat.com/downloads/wav2lip/wav2lip_gan.pth")
-def preprocess(image, audio_path):
     uid = str(uuid.uuid4())
     image_path = f"{uid}_image.jpg"
     audio_out_path = f"{uid}_audio.wav"
@@ -24,13 +35,9 @@ def preprocess(image, audio_path):
     image = image.resize((int(image.width * 256 / image.height), 256), Image.Resampling.LANCZOS)
     image.save(image_path)
-    # ✅ Resample audio to 16kHz mono WAV using ffmpeg
-    ffmpeg_command = [
-        "ffmpeg", "-i", audio_path,
-        "-ar", "16000", "-ac", "1",  # 16kHz mono
-        "-y", audio_out_path
-    ]
-    subprocess.run(ffmpeg_command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
     return image_path, audio_out_path, output_path
@@ -38,7 +45,7 @@ def generate(image, audio_file):
     image_path, audio_path, output_path = preprocess(image, audio_file)
     command = [
-        "python", "inference.py",
         "--checkpoint_path", "wav2lip_gan.pth",
         "--face", image_path,
         "--audio", audio_path,
@@ -52,10 +59,10 @@ gr.Interface(
     fn=generate,
     inputs=[
         gr.Image(type="pil", label="Upload Image"),
-        gr.Audio(type="filepath", label="Upload Audio (any format)")
     ],
     outputs=gr.Video(label="Generated Talking Video"),
     title="⚡ Wav2Lip (Optimized for Hugging Face CPU)",
-    description="Upload an image and audio. This version uses ffmpeg for resampling. Runs on free CPU tier.",
     live=True
 ).launch()

 import gradio as gr
 import os
 import uuid
+import subprocess
+import requests
 from PIL import Image
+# Safe imports
 try:
+    import librosa
 except ImportError:
+    os.system("pip install librosa")
+    import librosa
+try:
+    import soundfile as sf
+except ImportError:
+    os.system("pip install soundfile")
+    import soundfile as sf
+# ✅ Download Wav2Lip model if missing
+MODEL_URL = "https://huggingface.co/spaces/justest/wav2lip-v2/resolve/main/wav2lip_gan.pth"
 if not os.path.exists("wav2lip_gan.pth"):
+    r = requests.get(MODEL_URL)
+    with open("wav2lip_gan.pth", "wb") as f:
+        f.write(r.content)
+def preprocess(image, audio_file):
     uid = str(uuid.uuid4())
     image_path = f"{uid}_image.jpg"
     audio_out_path = f"{uid}_audio.wav"
     image = image.resize((int(image.width * 256 / image.height), 256), Image.Resampling.LANCZOS)
     image.save(image_path)
+    # ✅ Resample audio using librosa (16kHz mono)
+    y, sr = librosa.load(audio_file, sr=16000, mono=True)
+    sf.write(audio_out_path, y, 16000)
     return image_path, audio_out_path, output_path
     image_path, audio_path, output_path = preprocess(image, audio_file)
     command = [
+        "python3", "inference.py",
         "--checkpoint_path", "wav2lip_gan.pth",
         "--face", image_path,
         "--audio", audio_path,
     fn=generate,
     inputs=[
         gr.Image(type="pil", label="Upload Image"),
+        gr.Audio(type="filepath", label="Upload Audio")
     ],
     outputs=gr.Video(label="Generated Talking Video"),
     title="⚡ Wav2Lip (Optimized for Hugging Face CPU)",
+    description="Upload an image and audio. This version uses librosa for resampling and is CPU-friendly.",
     live=True
 ).launch()