mich123geb commited on
Commit
5b78679
Β·
verified Β·
1 Parent(s): cc5790a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -12
app.py CHANGED
@@ -3,31 +3,31 @@ import os
3
  import subprocess
4
  import uuid
5
  from PIL import Image
6
- import librosa
7
- import soundfile as sf # placed *after* librosa to avoid conflict
8
 
9
- # βœ… Download Wav2Lip model if not present
10
  if not os.path.exists("wav2lip_gan.pth"):
11
  os.system("wget https://www.adrianbulat.com/downloads/wav2lip/wav2lip_gan.pth")
12
 
13
- # βœ… Preprocessing to resize image + resample audio
14
  def preprocess(image, audio_path):
15
  uid = str(uuid.uuid4())
16
  image_path = f"{uid}_image.jpg"
17
  audio_out_path = f"{uid}_audio.wav"
18
  output_path = f"{uid}_output.mp4"
19
 
20
- # Resize image to height = 256 (maintain aspect ratio)
21
  image = image.resize((int(image.width * 256 / image.height), 256), Image.ANTIALIAS)
22
  image.save(image_path)
23
 
24
- # Resample audio to 16kHz mono using librosa
25
- y, sr = librosa.load(audio_path, sr=16000, mono=True)
26
- sf.write(audio_out_path, y, 16000)
 
 
 
 
27
 
28
  return image_path, audio_out_path, output_path
29
 
30
- # βœ… Main generate function
31
  def generate(image, audio_file):
32
  image_path, audio_path, output_path = preprocess(image, audio_file)
33
 
@@ -42,15 +42,14 @@ def generate(image, audio_file):
42
 
43
  return output_path
44
 
45
- # βœ… Gradio interface
46
  gr.Interface(
47
  fn=generate,
48
  inputs=[
49
  gr.Image(type="pil", label="Upload Image"),
50
- gr.Audio(type="filepath", label="Upload Audio (WAV recommended)")
51
  ],
52
  outputs=gr.Video(label="Generated Talking Video"),
53
  title="⚑ Wav2Lip (Optimized for Hugging Face CPU)",
54
- description="Upload an image and audio (preferably WAV). Runs on free CPU tier. ~2–4 min per video.",
55
  live=True
56
  ).launch()
 
3
  import subprocess
4
  import uuid
5
  from PIL import Image
 
 
6
 
7
+ # βœ… Download model if not present
8
  if not os.path.exists("wav2lip_gan.pth"):
9
  os.system("wget https://www.adrianbulat.com/downloads/wav2lip/wav2lip_gan.pth")
10
 
 
11
  def preprocess(image, audio_path):
12
  uid = str(uuid.uuid4())
13
  image_path = f"{uid}_image.jpg"
14
  audio_out_path = f"{uid}_audio.wav"
15
  output_path = f"{uid}_output.mp4"
16
 
17
+ # βœ… Resize image
18
  image = image.resize((int(image.width * 256 / image.height), 256), Image.ANTIALIAS)
19
  image.save(image_path)
20
 
21
+ # βœ… Resample audio to 16kHz mono WAV using ffmpeg
22
+ ffmpeg_command = [
23
+ "ffmpeg", "-i", audio_path,
24
+ "-ar", "16000", "-ac", "1", # 16kHz mono
25
+ "-y", audio_out_path
26
+ ]
27
+ subprocess.run(ffmpeg_command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
28
 
29
  return image_path, audio_out_path, output_path
30
 
 
31
  def generate(image, audio_file):
32
  image_path, audio_path, output_path = preprocess(image, audio_file)
33
 
 
42
 
43
  return output_path
44
 
 
45
  gr.Interface(
46
  fn=generate,
47
  inputs=[
48
  gr.Image(type="pil", label="Upload Image"),
49
+ gr.Audio(type="filepath", label="Upload Audio (any format)")
50
  ],
51
  outputs=gr.Video(label="Generated Talking Video"),
52
  title="⚑ Wav2Lip (Optimized for Hugging Face CPU)",
53
+ description="Upload an image and audio. This version uses ffmpeg for resampling. Runs on free CPU tier.",
54
  live=True
55
  ).launch()