Spaces:

mich123geb
/

wav2lip_api

Runtime error

App Files Files Community

mich123geb commited on Jul 12

Commit

c5790ed

verified ·

1 Parent(s): 68c26cf

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -14

app.py CHANGED Viewed

@@ -3,28 +3,31 @@ import os
 import subprocess
 import uuid
 from PIL import Image
-import soundfile as sf
-# ✅ Download model if not found (public mirror)
 if not os.path.exists("wav2lip_gan.pth"):
     os.system("wget https://www.adrianbulat.com/downloads/wav2lip/wav2lip_gan.pth")
-# ✅ Downscale image and audio to reduce memory and time
 def preprocess(image, audio_path):
     uid = str(uuid.uuid4())
-    image_path = f"{uid}_face.jpg"
     audio_out_path = f"{uid}_audio.wav"
-    # Resize image to 256 height (keep aspect ratio)
     image = image.resize((int(image.width * 256 / image.height), 256), Image.ANTIALIAS)
     image.save(image_path)
-    # Downsample audio to 16kHz mono to reduce load
-    data, samplerate = sf.read(audio_path)
-    sf.write(audio_out_path, data, 16000)  # 16kHz
-    return image_path, audio_out_path, f"{uid}_output.mp4"
 def generate(image, audio_file):
     image_path, audio_path, output_path = preprocess(image, audio_file)
@@ -39,14 +42,15 @@ def generate(image, audio_file):
     return output_path
 gr.Interface(
     fn=generate,
     inputs=[
-        gr.Image(type="pil", label="Image"),
-        gr.Audio(type="filepath", label="Audio (WAV only)")
     ],
-    outputs=gr.Video(label="Talking Video"),
-    title="⚡ Wav2Lip Fast (CPU Optimized)",
-    description="Lip-sync image & audio with lightweight preprocessing.",
     live=True
 ).launch()

 import subprocess
 import uuid
 from PIL import Image
+import librosa
+import soundfile as sf  # placed *after* librosa to avoid conflict
+# ✅ Download Wav2Lip model if not present
 if not os.path.exists("wav2lip_gan.pth"):
     os.system("wget https://www.adrianbulat.com/downloads/wav2lip/wav2lip_gan.pth")
+# ✅ Preprocessing to resize image + resample audio
 def preprocess(image, audio_path):
     uid = str(uuid.uuid4())
+    image_path = f"{uid}_image.jpg"
     audio_out_path = f"{uid}_audio.wav"
+    output_path = f"{uid}_output.mp4"
+    # Resize image to height = 256 (maintain aspect ratio)
     image = image.resize((int(image.width * 256 / image.height), 256), Image.ANTIALIAS)
     image.save(image_path)
+    # Resample audio to 16kHz mono using librosa
+    y, sr = librosa.load(audio_path, sr=16000, mono=True)
+    sf.write(audio_out_path, y, 16000)
+    return image_path, audio_out_path, output_path
+# ✅ Main generate function
 def generate(image, audio_file):
     image_path, audio_path, output_path = preprocess(image, audio_file)
     return output_path
+# ✅ Gradio interface
 gr.Interface(
     fn=generate,
     inputs=[
+        gr.Image(type="pil", label="Upload Image"),
+        gr.Audio(type="filepath", label="Upload Audio (WAV recommended)")
     ],
+    outputs=gr.Video(label="Generated Talking Video"),
+    title="⚡ Wav2Lip (Optimized for Hugging Face CPU)",
+    description="Upload an image and audio (preferably WAV). Runs on free CPU tier. ~2–4 min per video.",
     live=True
 ).launch()