Spaces:

mich123geb
/

wav2lip_api

Runtime error

App Files Files Community

mich123geb commited on Jul 12

Commit

5b78679

verified ·

1 Parent(s): cc5790a

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -12

app.py CHANGED Viewed

@@ -3,31 +3,31 @@ import os
 import subprocess
 import uuid
 from PIL import Image
-import librosa
-import soundfile as sf  # placed *after* librosa to avoid conflict
-# ✅ Download Wav2Lip model if not present
 if not os.path.exists("wav2lip_gan.pth"):
     os.system("wget https://www.adrianbulat.com/downloads/wav2lip/wav2lip_gan.pth")
-# ✅ Preprocessing to resize image + resample audio
 def preprocess(image, audio_path):
     uid = str(uuid.uuid4())
     image_path = f"{uid}_image.jpg"
     audio_out_path = f"{uid}_audio.wav"
     output_path = f"{uid}_output.mp4"
-    # Resize image to height = 256 (maintain aspect ratio)
     image = image.resize((int(image.width * 256 / image.height), 256), Image.ANTIALIAS)
     image.save(image_path)
-    # Resample audio to 16kHz mono using librosa
-    y, sr = librosa.load(audio_path, sr=16000, mono=True)
-    sf.write(audio_out_path, y, 16000)
     return image_path, audio_out_path, output_path
-# ✅ Main generate function
 def generate(image, audio_file):
     image_path, audio_path, output_path = preprocess(image, audio_file)
@@ -42,15 +42,14 @@ def generate(image, audio_file):
     return output_path
-# ✅ Gradio interface
 gr.Interface(
     fn=generate,
     inputs=[
         gr.Image(type="pil", label="Upload Image"),
-        gr.Audio(type="filepath", label="Upload Audio (WAV recommended)")
     ],
     outputs=gr.Video(label="Generated Talking Video"),
     title="⚡ Wav2Lip (Optimized for Hugging Face CPU)",
-    description="Upload an image and audio (preferably WAV). Runs on free CPU tier. ~2–4 min per video.",
     live=True
 ).launch()

 import subprocess
 import uuid
 from PIL import Image
+# ✅ Download model if not present
 if not os.path.exists("wav2lip_gan.pth"):
     os.system("wget https://www.adrianbulat.com/downloads/wav2lip/wav2lip_gan.pth")
 def preprocess(image, audio_path):
     uid = str(uuid.uuid4())
     image_path = f"{uid}_image.jpg"
     audio_out_path = f"{uid}_audio.wav"
     output_path = f"{uid}_output.mp4"
+    # ✅ Resize image
     image = image.resize((int(image.width * 256 / image.height), 256), Image.ANTIALIAS)
     image.save(image_path)
+    # ✅ Resample audio to 16kHz mono WAV using ffmpeg
+    ffmpeg_command = [
+        "ffmpeg", "-i", audio_path,
+        "-ar", "16000", "-ac", "1",  # 16kHz mono
+        "-y", audio_out_path
+    ]
+    subprocess.run(ffmpeg_command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
     return image_path, audio_out_path, output_path
 def generate(image, audio_file):
     image_path, audio_path, output_path = preprocess(image, audio_file)
     return output_path
 gr.Interface(
     fn=generate,
     inputs=[
         gr.Image(type="pil", label="Upload Image"),
+        gr.Audio(type="filepath", label="Upload Audio (any format)")
     ],
     outputs=gr.Video(label="Generated Talking Video"),
     title="⚡ Wav2Lip (Optimized for Hugging Face CPU)",
+    description="Upload an image and audio. This version uses ffmpeg for resampling. Runs on free CPU tier.",
     live=True
 ).launch()