mich123geb commited on
Commit
c12b434
Β·
verified Β·
1 Parent(s): 2ae66d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -38
app.py CHANGED
@@ -4,10 +4,8 @@ import subprocess
4
  from pathlib import Path
5
 
6
  import gradio as gr
7
- import numpy as np
8
  from PIL import Image
9
  from pydub import AudioSegment
10
- import face_alignment
11
 
12
  # ──────────────────────────────────────────────
13
  # 1. Download Wav2Lip model checkpoint
@@ -19,34 +17,7 @@ if not MODEL_PATH.exists():
19
  os.system(f"wget -q {MODEL_URL} -O {MODEL_PATH}")
20
 
21
  # ──────────────────────────────────────────────
22
- # 2. Face detection setup
23
- # ──────────────────────────────────────────────
24
- fa = face_alignment.FaceAlignment("2D", flip_input=False, device='cpu')
25
-
26
-
27
- def crop_face(image: Image.Image) -> Image.Image:
28
- img_np = np.array(image)
29
- preds = fa.get_landmarks(img_np)
30
-
31
- if preds is None or len(preds) == 0:
32
- raise ValueError("No face detected.")
33
-
34
- landmarks = preds[0]
35
- x1, y1 = landmarks.min(axis=0).astype(int)
36
- x2, y2 = landmarks.max(axis=0).astype(int)
37
-
38
- # Add padding
39
- pad_top, pad_bottom, pad_side = 20, 40, 30
40
- x1 = max(0, x1 - pad_side)
41
- x2 = min(img_np.shape[1], x2 + pad_side)
42
- y1 = max(0, y1 - pad_top)
43
- y2 = min(img_np.shape[0], y2 + pad_bottom)
44
-
45
- face_crop = image.crop((x1, y1, x2, y2))
46
- return face_crop
47
-
48
- # ──────────────────────────────────────────────
49
- # 3. Preprocess image and audio
50
  # ──────────────────────────────────────────────
51
  def preprocess(image, audio_file):
52
  if image is None or audio_file is None:
@@ -57,8 +28,7 @@ def preprocess(image, audio_file):
57
  wav_path = f"{uid}.wav"
58
  out_path = f"{uid}_result.mp4"
59
 
60
- cropped_face = crop_face(image)
61
- cropped_face.save(img_path)
62
 
63
  seg = AudioSegment.from_file(audio_file)
64
  seg = seg.set_frame_rate(16000).set_channels(1)
@@ -67,7 +37,7 @@ def preprocess(image, audio_file):
67
  return img_path, wav_path, out_path
68
 
69
  # ──────────────────────────────────────────────
70
- # 4. Main inference function
71
  # ──────────────────────────────────────────────
72
  def generate(image, audio):
73
  try:
@@ -85,7 +55,8 @@ def generate(image, audio):
85
  "--outfile", out_vid,
86
  "--resize_factor", "1",
87
  "--pads", "0", "20", "0", "0",
88
- "--fps", "25"
 
89
  ],
90
  check=True,
91
  )
@@ -95,17 +66,17 @@ def generate(image, audio):
95
  return out_vid if Path(out_vid).exists() else "❌ Generation failed."
96
 
97
  # ──────────────────────────────────────────────
98
- # 5. Gradio interface
99
  # ──────────────────────────────────────────────
100
  demo = gr.Interface(
101
  fn=generate,
102
  inputs=[
103
- gr.Image(type="pil", label="Image (one face only)"),
104
  gr.Audio(type="filepath", label="Audio (any format)")
105
  ],
106
  outputs=gr.Video(label="Talking-head MP4"),
107
- title="πŸ—£οΈ High-Quality Wav2Lip with Face Cropping",
108
- description="Automatically crops the face before lip-syncing for better quality output.",
109
  allow_flagging="never",
110
  live=True,
111
  )
 
4
  from pathlib import Path
5
 
6
  import gradio as gr
 
7
  from PIL import Image
8
  from pydub import AudioSegment
 
9
 
10
  # ──────────────────────────────────────────────
11
  # 1. Download Wav2Lip model checkpoint
 
17
  os.system(f"wget -q {MODEL_URL} -O {MODEL_PATH}")
18
 
19
  # ──────────────────────────────────────────────
20
+ # 2. Preprocess image and audio (no cropping)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  # ──────────────────────────────────────────────
22
  def preprocess(image, audio_file):
23
  if image is None or audio_file is None:
 
28
  wav_path = f"{uid}.wav"
29
  out_path = f"{uid}_result.mp4"
30
 
31
+ image.save(img_path)
 
32
 
33
  seg = AudioSegment.from_file(audio_file)
34
  seg = seg.set_frame_rate(16000).set_channels(1)
 
37
  return img_path, wav_path, out_path
38
 
39
  # ──────────────────────────────────────────────
40
+ # 3. Main inference function
41
  # ──────────────────────────────────────────────
42
  def generate(image, audio):
43
  try:
 
55
  "--outfile", out_vid,
56
  "--resize_factor", "1",
57
  "--pads", "0", "20", "0", "0",
58
+ "--fps", "25",
59
+ "--nosmooth"
60
  ],
61
  check=True,
62
  )
 
66
  return out_vid if Path(out_vid).exists() else "❌ Generation failed."
67
 
68
  # ──────────────────────────────────────────────
69
+ # 4. Gradio interface
70
  # ──────────────────────────────────────────────
71
  demo = gr.Interface(
72
  fn=generate,
73
  inputs=[
74
+ gr.Image(type="pil", label="Image (Full Resolution - Face Visible)"),
75
  gr.Audio(type="filepath", label="Audio (any format)")
76
  ],
77
  outputs=gr.Video(label="Talking-head MP4"),
78
+ title="πŸ—£οΈ High-Quality Wav2Lip (No Crop, Full Image)",
79
+ description="Lip-sync using full image resolution. Add padding under the mouth and avoid smoothing for sharper lips.",
80
  allow_flagging="never",
81
  live=True,
82
  )