mich123geb commited on
Commit
a37c88f
·
verified ·
1 Parent(s): ff32b6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -18
app.py CHANGED
@@ -1,20 +1,31 @@
1
  import gradio as gr
2
  import os
3
- import subprocess
4
  import uuid
 
 
5
  from PIL import Image
6
- # add at the top of app.py
 
7
  try:
8
- import scipy
9
  except ImportError:
10
- os.system("pip install scipy")
11
- import scipy
12
 
13
- # ✅ Download model if not present
 
 
 
 
 
 
 
14
  if not os.path.exists("wav2lip_gan.pth"):
15
- os.system("wget https://www.adrianbulat.com/downloads/wav2lip/wav2lip_gan.pth")
 
 
16
 
17
- def preprocess(image, audio_path):
18
  uid = str(uuid.uuid4())
19
  image_path = f"{uid}_image.jpg"
20
  audio_out_path = f"{uid}_audio.wav"
@@ -24,13 +35,9 @@ def preprocess(image, audio_path):
24
  image = image.resize((int(image.width * 256 / image.height), 256), Image.Resampling.LANCZOS)
25
  image.save(image_path)
26
 
27
- # ✅ Resample audio to 16kHz mono WAV using ffmpeg
28
- ffmpeg_command = [
29
- "ffmpeg", "-i", audio_path,
30
- "-ar", "16000", "-ac", "1", # 16kHz mono
31
- "-y", audio_out_path
32
- ]
33
- subprocess.run(ffmpeg_command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
34
 
35
  return image_path, audio_out_path, output_path
36
 
@@ -38,7 +45,7 @@ def generate(image, audio_file):
38
  image_path, audio_path, output_path = preprocess(image, audio_file)
39
 
40
  command = [
41
- "python", "inference.py",
42
  "--checkpoint_path", "wav2lip_gan.pth",
43
  "--face", image_path,
44
  "--audio", audio_path,
@@ -52,10 +59,10 @@ gr.Interface(
52
  fn=generate,
53
  inputs=[
54
  gr.Image(type="pil", label="Upload Image"),
55
- gr.Audio(type="filepath", label="Upload Audio (any format)")
56
  ],
57
  outputs=gr.Video(label="Generated Talking Video"),
58
  title="⚡ Wav2Lip (Optimized for Hugging Face CPU)",
59
- description="Upload an image and audio. This version uses ffmpeg for resampling. Runs on free CPU tier.",
60
  live=True
61
  ).launch()
 
1
  import gradio as gr
2
  import os
 
3
  import uuid
4
+ import subprocess
5
+ import requests
6
  from PIL import Image
7
+
8
+ # Safe imports
9
  try:
10
+ import librosa
11
  except ImportError:
12
+ os.system("pip install librosa")
13
+ import librosa
14
 
15
+ try:
16
+ import soundfile as sf
17
+ except ImportError:
18
+ os.system("pip install soundfile")
19
+ import soundfile as sf
20
+
21
+ # ✅ Download Wav2Lip model if missing
22
+ MODEL_URL = "https://huggingface.co/spaces/justest/wav2lip-v2/resolve/main/wav2lip_gan.pth"
23
  if not os.path.exists("wav2lip_gan.pth"):
24
+ r = requests.get(MODEL_URL)
25
+ with open("wav2lip_gan.pth", "wb") as f:
26
+ f.write(r.content)
27
 
28
+ def preprocess(image, audio_file):
29
  uid = str(uuid.uuid4())
30
  image_path = f"{uid}_image.jpg"
31
  audio_out_path = f"{uid}_audio.wav"
 
35
  image = image.resize((int(image.width * 256 / image.height), 256), Image.Resampling.LANCZOS)
36
  image.save(image_path)
37
 
38
+ # ✅ Resample audio using librosa (16kHz mono)
39
+ y, sr = librosa.load(audio_file, sr=16000, mono=True)
40
+ sf.write(audio_out_path, y, 16000)
 
 
 
 
41
 
42
  return image_path, audio_out_path, output_path
43
 
 
45
  image_path, audio_path, output_path = preprocess(image, audio_file)
46
 
47
  command = [
48
+ "python3", "inference.py",
49
  "--checkpoint_path", "wav2lip_gan.pth",
50
  "--face", image_path,
51
  "--audio", audio_path,
 
59
  fn=generate,
60
  inputs=[
61
  gr.Image(type="pil", label="Upload Image"),
62
+ gr.Audio(type="filepath", label="Upload Audio")
63
  ],
64
  outputs=gr.Video(label="Generated Talking Video"),
65
  title="⚡ Wav2Lip (Optimized for Hugging Face CPU)",
66
+ description="Upload an image and audio. This version uses librosa for resampling and is CPU-friendly.",
67
  live=True
68
  ).launch()