Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,20 +1,31 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
| 3 |
-
import subprocess
|
| 4 |
import uuid
|
|
|
|
|
|
|
| 5 |
from PIL import Image
|
| 6 |
-
|
|
|
|
| 7 |
try:
|
| 8 |
-
import
|
| 9 |
except ImportError:
|
| 10 |
-
os.system("pip install
|
| 11 |
-
import
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
if not os.path.exists("wav2lip_gan.pth"):
|
| 15 |
-
|
|
|
|
|
|
|
| 16 |
|
| 17 |
-
def preprocess(image,
|
| 18 |
uid = str(uuid.uuid4())
|
| 19 |
image_path = f"{uid}_image.jpg"
|
| 20 |
audio_out_path = f"{uid}_audio.wav"
|
|
@@ -24,13 +35,9 @@ def preprocess(image, audio_path):
|
|
| 24 |
image = image.resize((int(image.width * 256 / image.height), 256), Image.Resampling.LANCZOS)
|
| 25 |
image.save(image_path)
|
| 26 |
|
| 27 |
-
# ✅ Resample audio
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
"-ar", "16000", "-ac", "1", # 16kHz mono
|
| 31 |
-
"-y", audio_out_path
|
| 32 |
-
]
|
| 33 |
-
subprocess.run(ffmpeg_command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 34 |
|
| 35 |
return image_path, audio_out_path, output_path
|
| 36 |
|
|
@@ -38,7 +45,7 @@ def generate(image, audio_file):
|
|
| 38 |
image_path, audio_path, output_path = preprocess(image, audio_file)
|
| 39 |
|
| 40 |
command = [
|
| 41 |
-
"
|
| 42 |
"--checkpoint_path", "wav2lip_gan.pth",
|
| 43 |
"--face", image_path,
|
| 44 |
"--audio", audio_path,
|
|
@@ -52,10 +59,10 @@ gr.Interface(
|
|
| 52 |
fn=generate,
|
| 53 |
inputs=[
|
| 54 |
gr.Image(type="pil", label="Upload Image"),
|
| 55 |
-
gr.Audio(type="filepath", label="Upload Audio
|
| 56 |
],
|
| 57 |
outputs=gr.Video(label="Generated Talking Video"),
|
| 58 |
title="⚡ Wav2Lip (Optimized for Hugging Face CPU)",
|
| 59 |
-
description="Upload an image and audio. This version uses
|
| 60 |
live=True
|
| 61 |
).launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
|
|
|
| 3 |
import uuid
|
| 4 |
+
import subprocess
|
| 5 |
+
import requests
|
| 6 |
from PIL import Image
|
| 7 |
+
|
| 8 |
+
# Safe imports
|
| 9 |
try:
|
| 10 |
+
import librosa
|
| 11 |
except ImportError:
|
| 12 |
+
os.system("pip install librosa")
|
| 13 |
+
import librosa
|
| 14 |
|
| 15 |
+
try:
|
| 16 |
+
import soundfile as sf
|
| 17 |
+
except ImportError:
|
| 18 |
+
os.system("pip install soundfile")
|
| 19 |
+
import soundfile as sf
|
| 20 |
+
|
| 21 |
+
# ✅ Download Wav2Lip model if missing
|
| 22 |
+
MODEL_URL = "https://huggingface.co/spaces/justest/wav2lip-v2/resolve/main/wav2lip_gan.pth"
|
| 23 |
if not os.path.exists("wav2lip_gan.pth"):
|
| 24 |
+
r = requests.get(MODEL_URL)
|
| 25 |
+
with open("wav2lip_gan.pth", "wb") as f:
|
| 26 |
+
f.write(r.content)
|
| 27 |
|
| 28 |
+
def preprocess(image, audio_file):
|
| 29 |
uid = str(uuid.uuid4())
|
| 30 |
image_path = f"{uid}_image.jpg"
|
| 31 |
audio_out_path = f"{uid}_audio.wav"
|
|
|
|
| 35 |
image = image.resize((int(image.width * 256 / image.height), 256), Image.Resampling.LANCZOS)
|
| 36 |
image.save(image_path)
|
| 37 |
|
| 38 |
+
# ✅ Resample audio using librosa (16kHz mono)
|
| 39 |
+
y, sr = librosa.load(audio_file, sr=16000, mono=True)
|
| 40 |
+
sf.write(audio_out_path, y, 16000)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
return image_path, audio_out_path, output_path
|
| 43 |
|
|
|
|
| 45 |
image_path, audio_path, output_path = preprocess(image, audio_file)
|
| 46 |
|
| 47 |
command = [
|
| 48 |
+
"python3", "inference.py",
|
| 49 |
"--checkpoint_path", "wav2lip_gan.pth",
|
| 50 |
"--face", image_path,
|
| 51 |
"--audio", audio_path,
|
|
|
|
| 59 |
fn=generate,
|
| 60 |
inputs=[
|
| 61 |
gr.Image(type="pil", label="Upload Image"),
|
| 62 |
+
gr.Audio(type="filepath", label="Upload Audio")
|
| 63 |
],
|
| 64 |
outputs=gr.Video(label="Generated Talking Video"),
|
| 65 |
title="⚡ Wav2Lip (Optimized for Hugging Face CPU)",
|
| 66 |
+
description="Upload an image and audio. This version uses librosa for resampling and is CPU-friendly.",
|
| 67 |
live=True
|
| 68 |
).launch()
|