Spaces:

mich123geb
/

wav2lip_api

Runtime error

App Files Files Community

mich123geb commited on Jul 21

Commit

c12b434

verified ·

1 Parent(s): 2ae66d0

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -38

app.py CHANGED Viewed

@@ -4,10 +4,8 @@ import subprocess
 from pathlib import Path
 import gradio as gr
-import numpy as np
 from PIL import Image
 from pydub import AudioSegment
-import face_alignment
 # ──────────────────────────────────────────────
 # 1. Download Wav2Lip model checkpoint
@@ -19,34 +17,7 @@ if not MODEL_PATH.exists():
     os.system(f"wget -q {MODEL_URL} -O {MODEL_PATH}")
 # ──────────────────────────────────────────────
-# 2. Face detection setup
-# ──────────────────────────────────────────────
-fa = face_alignment.FaceAlignment("2D", flip_input=False, device='cpu')
-def crop_face(image: Image.Image) -> Image.Image:
-    img_np = np.array(image)
-    preds = fa.get_landmarks(img_np)
-    if preds is None or len(preds) == 0:
-        raise ValueError("No face detected.")
-    landmarks = preds[0]
-    x1, y1 = landmarks.min(axis=0).astype(int)
-    x2, y2 = landmarks.max(axis=0).astype(int)
-    # Add padding
-    pad_top, pad_bottom, pad_side = 20, 40, 30
-    x1 = max(0, x1 - pad_side)
-    x2 = min(img_np.shape[1], x2 + pad_side)
-    y1 = max(0, y1 - pad_top)
-    y2 = min(img_np.shape[0], y2 + pad_bottom)
-    face_crop = image.crop((x1, y1, x2, y2))
-    return face_crop
-# ──────────────────────────────────────────────
-# 3. Preprocess image and audio
 # ──────────────────────────────────────────────
 def preprocess(image, audio_file):
     if image is None or audio_file is None:
@@ -57,8 +28,7 @@ def preprocess(image, audio_file):
     wav_path = f"{uid}.wav"
     out_path = f"{uid}_result.mp4"
-    cropped_face = crop_face(image)
-    cropped_face.save(img_path)
     seg = AudioSegment.from_file(audio_file)
     seg = seg.set_frame_rate(16000).set_channels(1)
@@ -67,7 +37,7 @@ def preprocess(image, audio_file):
     return img_path, wav_path, out_path
 # ──────────────────────────────────────────────
-# 4. Main inference function
 # ──────────────────────────────────────────────
 def generate(image, audio):
     try:
@@ -85,7 +55,8 @@ def generate(image, audio):
                 "--outfile", out_vid,
                 "--resize_factor", "1",
                 "--pads", "0", "20", "0", "0",
-                "--fps", "25"
             ],
             check=True,
         )
@@ -95,17 +66,17 @@ def generate(image, audio):
     return out_vid if Path(out_vid).exists() else "❌ Generation failed."
 # ──────────────────────────────────────────────
-# 5. Gradio interface
 # ──────────────────────────────────────────────
 demo = gr.Interface(
     fn=generate,
     inputs=[
-        gr.Image(type="pil", label="Image (one face only)"),
         gr.Audio(type="filepath", label="Audio (any format)")
     ],
     outputs=gr.Video(label="Talking-head MP4"),
-    title="🗣️ High-Quality Wav2Lip with Face Cropping",
-    description="Automatically crops the face before lip-syncing for better quality output.",
     allow_flagging="never",
     live=True,
 )

 from pathlib import Path
 import gradio as gr
 from PIL import Image
 from pydub import AudioSegment
 # ──────────────────────────────────────────────
 # 1. Download Wav2Lip model checkpoint
     os.system(f"wget -q {MODEL_URL} -O {MODEL_PATH}")
 # ──────────────────────────────────────────────
+# 2. Preprocess image and audio (no cropping)
 # ──────────────────────────────────────────────
 def preprocess(image, audio_file):
     if image is None or audio_file is None:
     wav_path = f"{uid}.wav"
     out_path = f"{uid}_result.mp4"
+    image.save(img_path)
     seg = AudioSegment.from_file(audio_file)
     seg = seg.set_frame_rate(16000).set_channels(1)
     return img_path, wav_path, out_path
 # ──────────────────────────────────────────────
+# 3. Main inference function
 # ──────────────────────────────────────────────
 def generate(image, audio):
     try:
                 "--outfile", out_vid,
                 "--resize_factor", "1",
                 "--pads", "0", "20", "0", "0",
+                "--fps", "25",
+                "--nosmooth"
             ],
             check=True,
         )
     return out_vid if Path(out_vid).exists() else "❌ Generation failed."
 # ──────────────────────────────────────────────
+# 4. Gradio interface
 # ──────────────────────────────────────────────
 demo = gr.Interface(
     fn=generate,
     inputs=[
+        gr.Image(type="pil", label="Image (Full Resolution - Face Visible)"),
         gr.Audio(type="filepath", label="Audio (any format)")
     ],
     outputs=gr.Video(label="Talking-head MP4"),
+    title="🗣️ High-Quality Wav2Lip (No Crop, Full Image)",
+    description="Lip-sync using full image resolution. Add padding under the mouth and avoid smoothing for sharper lips.",
     allow_flagging="never",
     live=True,
 )