wav2lip_api / app.py
mich123geb's picture
Update app.py
a37c88f verified
raw
history blame
1.9 kB
import gradio as gr
import os
import uuid
import subprocess
import requests
from PIL import Image
# Safe imports
try:
import librosa
except ImportError:
os.system("pip install librosa")
import librosa
try:
import soundfile as sf
except ImportError:
os.system("pip install soundfile")
import soundfile as sf
# βœ… Download Wav2Lip model if missing
MODEL_URL = "https://huggingface.co/spaces/justest/wav2lip-v2/resolve/main/wav2lip_gan.pth"
if not os.path.exists("wav2lip_gan.pth"):
r = requests.get(MODEL_URL)
with open("wav2lip_gan.pth", "wb") as f:
f.write(r.content)
def preprocess(image, audio_file):
uid = str(uuid.uuid4())
image_path = f"{uid}_image.jpg"
audio_out_path = f"{uid}_audio.wav"
output_path = f"{uid}_output.mp4"
# βœ… Resize image
image = image.resize((int(image.width * 256 / image.height), 256), Image.Resampling.LANCZOS)
image.save(image_path)
# βœ… Resample audio using librosa (16kHz mono)
y, sr = librosa.load(audio_file, sr=16000, mono=True)
sf.write(audio_out_path, y, 16000)
return image_path, audio_out_path, output_path
def generate(image, audio_file):
image_path, audio_path, output_path = preprocess(image, audio_file)
command = [
"python3", "inference.py",
"--checkpoint_path", "wav2lip_gan.pth",
"--face", image_path,
"--audio", audio_path,
"--outfile", output_path
]
subprocess.run(command)
return output_path
gr.Interface(
fn=generate,
inputs=[
gr.Image(type="pil", label="Upload Image"),
gr.Audio(type="filepath", label="Upload Audio")
],
outputs=gr.Video(label="Generated Talking Video"),
title="⚑ Wav2Lip (Optimized for Hugging Face CPU)",
description="Upload an image and audio. This version uses librosa for resampling and is CPU-friendly.",
live=True
).launch()