Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,28 +3,31 @@ import os
|
|
| 3 |
import subprocess
|
| 4 |
import uuid
|
| 5 |
from PIL import Image
|
| 6 |
-
import
|
|
|
|
| 7 |
|
| 8 |
-
# ✅ Download model if not
|
| 9 |
if not os.path.exists("wav2lip_gan.pth"):
|
| 10 |
os.system("wget https://www.adrianbulat.com/downloads/wav2lip/wav2lip_gan.pth")
|
| 11 |
|
| 12 |
-
# ✅
|
| 13 |
def preprocess(image, audio_path):
|
| 14 |
uid = str(uuid.uuid4())
|
| 15 |
-
image_path = f"{uid}
|
| 16 |
audio_out_path = f"{uid}_audio.wav"
|
|
|
|
| 17 |
|
| 18 |
-
# Resize image to 256
|
| 19 |
image = image.resize((int(image.width * 256 / image.height), 256), Image.ANTIALIAS)
|
| 20 |
image.save(image_path)
|
| 21 |
|
| 22 |
-
#
|
| 23 |
-
|
| 24 |
-
sf.write(audio_out_path,
|
| 25 |
|
| 26 |
-
return image_path, audio_out_path,
|
| 27 |
|
|
|
|
| 28 |
def generate(image, audio_file):
|
| 29 |
image_path, audio_path, output_path = preprocess(image, audio_file)
|
| 30 |
|
|
@@ -39,14 +42,15 @@ def generate(image, audio_file):
|
|
| 39 |
|
| 40 |
return output_path
|
| 41 |
|
|
|
|
| 42 |
gr.Interface(
|
| 43 |
fn=generate,
|
| 44 |
inputs=[
|
| 45 |
-
gr.Image(type="pil", label="Image"),
|
| 46 |
-
gr.Audio(type="filepath", label="Audio (WAV
|
| 47 |
],
|
| 48 |
-
outputs=gr.Video(label="Talking Video"),
|
| 49 |
-
title="⚡ Wav2Lip
|
| 50 |
-
description="
|
| 51 |
live=True
|
| 52 |
).launch()
|
|
|
|
| 3 |
import subprocess
|
| 4 |
import uuid
|
| 5 |
from PIL import Image
|
| 6 |
+
import librosa
|
| 7 |
+
import soundfile as sf # placed *after* librosa to avoid conflict
|
| 8 |
|
| 9 |
+
# ✅ Download Wav2Lip model if not present
|
| 10 |
if not os.path.exists("wav2lip_gan.pth"):
|
| 11 |
os.system("wget https://www.adrianbulat.com/downloads/wav2lip/wav2lip_gan.pth")
|
| 12 |
|
| 13 |
+
# ✅ Preprocessing to resize image + resample audio
|
| 14 |
def preprocess(image, audio_path):
|
| 15 |
uid = str(uuid.uuid4())
|
| 16 |
+
image_path = f"{uid}_image.jpg"
|
| 17 |
audio_out_path = f"{uid}_audio.wav"
|
| 18 |
+
output_path = f"{uid}_output.mp4"
|
| 19 |
|
| 20 |
+
# Resize image to height = 256 (maintain aspect ratio)
|
| 21 |
image = image.resize((int(image.width * 256 / image.height), 256), Image.ANTIALIAS)
|
| 22 |
image.save(image_path)
|
| 23 |
|
| 24 |
+
# Resample audio to 16kHz mono using librosa
|
| 25 |
+
y, sr = librosa.load(audio_path, sr=16000, mono=True)
|
| 26 |
+
sf.write(audio_out_path, y, 16000)
|
| 27 |
|
| 28 |
+
return image_path, audio_out_path, output_path
|
| 29 |
|
| 30 |
+
# ✅ Main generate function
|
| 31 |
def generate(image, audio_file):
|
| 32 |
image_path, audio_path, output_path = preprocess(image, audio_file)
|
| 33 |
|
|
|
|
| 42 |
|
| 43 |
return output_path
|
| 44 |
|
| 45 |
+
# ✅ Gradio interface
|
| 46 |
gr.Interface(
|
| 47 |
fn=generate,
|
| 48 |
inputs=[
|
| 49 |
+
gr.Image(type="pil", label="Upload Image"),
|
| 50 |
+
gr.Audio(type="filepath", label="Upload Audio (WAV recommended)")
|
| 51 |
],
|
| 52 |
+
outputs=gr.Video(label="Generated Talking Video"),
|
| 53 |
+
title="⚡ Wav2Lip (Optimized for Hugging Face CPU)",
|
| 54 |
+
description="Upload an image and audio (preferably WAV). Runs on free CPU tier. ~2–4 min per video.",
|
| 55 |
live=True
|
| 56 |
).launch()
|