mich123geb commited on
Commit
c5790ed
·
verified ·
1 Parent(s): 68c26cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -14
app.py CHANGED
@@ -3,28 +3,31 @@ import os
3
  import subprocess
4
  import uuid
5
  from PIL import Image
6
- import soundfile as sf
 
7
 
8
- # ✅ Download model if not found (public mirror)
9
  if not os.path.exists("wav2lip_gan.pth"):
10
  os.system("wget https://www.adrianbulat.com/downloads/wav2lip/wav2lip_gan.pth")
11
 
12
- # ✅ Downscale image and audio to reduce memory and time
13
  def preprocess(image, audio_path):
14
  uid = str(uuid.uuid4())
15
- image_path = f"{uid}_face.jpg"
16
  audio_out_path = f"{uid}_audio.wav"
 
17
 
18
- # Resize image to 256 height (keep aspect ratio)
19
  image = image.resize((int(image.width * 256 / image.height), 256), Image.ANTIALIAS)
20
  image.save(image_path)
21
 
22
- # Downsample audio to 16kHz mono to reduce load
23
- data, samplerate = sf.read(audio_path)
24
- sf.write(audio_out_path, data, 16000) # 16kHz
25
 
26
- return image_path, audio_out_path, f"{uid}_output.mp4"
27
 
 
28
  def generate(image, audio_file):
29
  image_path, audio_path, output_path = preprocess(image, audio_file)
30
 
@@ -39,14 +42,15 @@ def generate(image, audio_file):
39
 
40
  return output_path
41
 
 
42
  gr.Interface(
43
  fn=generate,
44
  inputs=[
45
- gr.Image(type="pil", label="Image"),
46
- gr.Audio(type="filepath", label="Audio (WAV only)")
47
  ],
48
- outputs=gr.Video(label="Talking Video"),
49
- title="⚡ Wav2Lip Fast (CPU Optimized)",
50
- description="Lip-sync image & audio with lightweight preprocessing.",
51
  live=True
52
  ).launch()
 
3
  import subprocess
4
  import uuid
5
  from PIL import Image
6
+ import librosa
7
+ import soundfile as sf # placed *after* librosa to avoid conflict
8
 
9
+ # ✅ Download Wav2Lip model if not present
10
  if not os.path.exists("wav2lip_gan.pth"):
11
  os.system("wget https://www.adrianbulat.com/downloads/wav2lip/wav2lip_gan.pth")
12
 
13
+ # ✅ Preprocessing to resize image + resample audio
14
  def preprocess(image, audio_path):
15
  uid = str(uuid.uuid4())
16
+ image_path = f"{uid}_image.jpg"
17
  audio_out_path = f"{uid}_audio.wav"
18
+ output_path = f"{uid}_output.mp4"
19
 
20
+ # Resize image to height = 256 (maintain aspect ratio)
21
  image = image.resize((int(image.width * 256 / image.height), 256), Image.ANTIALIAS)
22
  image.save(image_path)
23
 
24
+ # Resample audio to 16kHz mono using librosa
25
+ y, sr = librosa.load(audio_path, sr=16000, mono=True)
26
+ sf.write(audio_out_path, y, 16000)
27
 
28
+ return image_path, audio_out_path, output_path
29
 
30
+ # ✅ Main generate function
31
  def generate(image, audio_file):
32
  image_path, audio_path, output_path = preprocess(image, audio_file)
33
 
 
42
 
43
  return output_path
44
 
45
+ # ✅ Gradio interface
46
  gr.Interface(
47
  fn=generate,
48
  inputs=[
49
+ gr.Image(type="pil", label="Upload Image"),
50
+ gr.Audio(type="filepath", label="Upload Audio (WAV recommended)")
51
  ],
52
+ outputs=gr.Video(label="Generated Talking Video"),
53
+ title="⚡ Wav2Lip (Optimized for Hugging Face CPU)",
54
+ description="Upload an image and audio (preferably WAV). Runs on free CPU tier. ~2–4 min per video.",
55
  live=True
56
  ).launch()