Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,31 +3,31 @@ import os
|
|
| 3 |
import subprocess
|
| 4 |
import uuid
|
| 5 |
from PIL import Image
|
| 6 |
-
import librosa
|
| 7 |
-
import soundfile as sf # placed *after* librosa to avoid conflict
|
| 8 |
|
| 9 |
-
# β
Download
|
| 10 |
if not os.path.exists("wav2lip_gan.pth"):
|
| 11 |
os.system("wget https://www.adrianbulat.com/downloads/wav2lip/wav2lip_gan.pth")
|
| 12 |
|
| 13 |
-
# β
Preprocessing to resize image + resample audio
|
| 14 |
def preprocess(image, audio_path):
|
| 15 |
uid = str(uuid.uuid4())
|
| 16 |
image_path = f"{uid}_image.jpg"
|
| 17 |
audio_out_path = f"{uid}_audio.wav"
|
| 18 |
output_path = f"{uid}_output.mp4"
|
| 19 |
|
| 20 |
-
# Resize image
|
| 21 |
image = image.resize((int(image.width * 256 / image.height), 256), Image.ANTIALIAS)
|
| 22 |
image.save(image_path)
|
| 23 |
|
| 24 |
-
# Resample audio to 16kHz mono using
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
return image_path, audio_out_path, output_path
|
| 29 |
|
| 30 |
-
# β
Main generate function
|
| 31 |
def generate(image, audio_file):
|
| 32 |
image_path, audio_path, output_path = preprocess(image, audio_file)
|
| 33 |
|
|
@@ -42,15 +42,14 @@ def generate(image, audio_file):
|
|
| 42 |
|
| 43 |
return output_path
|
| 44 |
|
| 45 |
-
# β
Gradio interface
|
| 46 |
gr.Interface(
|
| 47 |
fn=generate,
|
| 48 |
inputs=[
|
| 49 |
gr.Image(type="pil", label="Upload Image"),
|
| 50 |
-
gr.Audio(type="filepath", label="Upload Audio (
|
| 51 |
],
|
| 52 |
outputs=gr.Video(label="Generated Talking Video"),
|
| 53 |
title="β‘ Wav2Lip (Optimized for Hugging Face CPU)",
|
| 54 |
-
description="Upload an image and audio
|
| 55 |
live=True
|
| 56 |
).launch()
|
|
|
|
| 3 |
import subprocess
|
| 4 |
import uuid
|
| 5 |
from PIL import Image
|
|
|
|
|
|
|
| 6 |
|
| 7 |
+
# β
Download model if not present
|
| 8 |
if not os.path.exists("wav2lip_gan.pth"):
|
| 9 |
os.system("wget https://www.adrianbulat.com/downloads/wav2lip/wav2lip_gan.pth")
|
| 10 |
|
|
|
|
| 11 |
def preprocess(image, audio_path):
|
| 12 |
uid = str(uuid.uuid4())
|
| 13 |
image_path = f"{uid}_image.jpg"
|
| 14 |
audio_out_path = f"{uid}_audio.wav"
|
| 15 |
output_path = f"{uid}_output.mp4"
|
| 16 |
|
| 17 |
+
# β
Resize image
|
| 18 |
image = image.resize((int(image.width * 256 / image.height), 256), Image.ANTIALIAS)
|
| 19 |
image.save(image_path)
|
| 20 |
|
| 21 |
+
# β
Resample audio to 16kHz mono WAV using ffmpeg
|
| 22 |
+
ffmpeg_command = [
|
| 23 |
+
"ffmpeg", "-i", audio_path,
|
| 24 |
+
"-ar", "16000", "-ac", "1", # 16kHz mono
|
| 25 |
+
"-y", audio_out_path
|
| 26 |
+
]
|
| 27 |
+
subprocess.run(ffmpeg_command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 28 |
|
| 29 |
return image_path, audio_out_path, output_path
|
| 30 |
|
|
|
|
| 31 |
def generate(image, audio_file):
|
| 32 |
image_path, audio_path, output_path = preprocess(image, audio_file)
|
| 33 |
|
|
|
|
| 42 |
|
| 43 |
return output_path
|
| 44 |
|
|
|
|
| 45 |
gr.Interface(
|
| 46 |
fn=generate,
|
| 47 |
inputs=[
|
| 48 |
gr.Image(type="pil", label="Upload Image"),
|
| 49 |
+
gr.Audio(type="filepath", label="Upload Audio (any format)")
|
| 50 |
],
|
| 51 |
outputs=gr.Video(label="Generated Talking Video"),
|
| 52 |
title="β‘ Wav2Lip (Optimized for Hugging Face CPU)",
|
| 53 |
+
description="Upload an image and audio. This version uses ffmpeg for resampling. Runs on free CPU tier.",
|
| 54 |
live=True
|
| 55 |
).launch()
|