Spaces:

banao-tech
/

model-testing

Build error

App Files Files Community

banao-tech commited on 27 days ago

Commit

4c48c35

verified ·

1 Parent(s): 1872e4c

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -80

app.py CHANGED Viewed

@@ -5,14 +5,11 @@ from pathlib import Path
 from datetime import datetime
 import gradio as gr
 from huggingface_hub import snapshot_download
-import numpy as np
-from PIL import Image
 ROOT = Path(__file__).parent.resolve()
 REPO_DIR = ROOT / "LatentSync"
 CKPT_DIR = REPO_DIR / "checkpoints"
 TEMP_DIR = REPO_DIR / "temp"
-MASK_DIR = REPO_DIR / "latentsync" / "utils"
 # Use 1.5 on T4 16GB
 HF_CKPT_REPO = "ByteDance/LatentSync-1.5"
@@ -21,63 +18,58 @@ def run(cmd, cwd=None):
     print(" ".join(map(str, cmd)))
     subprocess.check_call(cmd, cwd=cwd)
-def create_mask_image():
-    """
-    Create the missing mask.png file that LatentSync expects.
-    This creates a circular mask for the mouth region (lower half of face).
-    """
-    mask_path = MASK_DIR / "mask.png"
-    if mask_path.exists():
-        return  # Mask already exists
-    # Create the utils directory if it doesn't exist
-    MASK_DIR.mkdir(parents=True, exist_ok=True)
-    # Create a 256x256 mask image
-    # White (255) = area to be inpainted (mouth region)
-    # Black (0) = area to keep unchanged
-    height, width = 256, 256
-    mask = np.zeros((height, width), dtype=np.uint8)
-    # Create an elliptical mask for the lower face/mouth region
-    # This covers approximately the bottom third of the face
-    center_x, center_y = width // 2, int(height * 0.7)
-    radius_x, radius_y = int(width * 0.35), int(height * 0.25)
-    for y in range(height):
-        for x in range(width):
-            # Ellipse equation: ((x-cx)/rx)^2 + ((y-cy)/ry)^2 <= 1
-            if ((x - center_x) / radius_x) ** 2 + ((y - center_y) / radius_y) ** 2 <= 1:
-                mask[y, x] = 255
-    # Save the mask
-    mask_img = Image.fromarray(mask, mode='L')
-    mask_img.save(str(mask_path))
-    print(f"Created mask image at {mask_path}")
 def setup():
-    # Clone LatentSync repo at runtime
     if not REPO_DIR.exists():
         run(["git", "clone", "--depth", "1", "https://github.com/bytedance/LatentSync.git", str(REPO_DIR)])
     CKPT_DIR.mkdir(parents=True, exist_ok=True)
     TEMP_DIR.mkdir(parents=True, exist_ok=True)
-    # Create the missing mask.png file
-    create_mask_image()
-    # Download all checkpoint files
     snapshot_download(
         repo_id=HF_CKPT_REPO,
         local_dir=str(CKPT_DIR),
         local_dir_use_symlinks=False,
     )
 def make_still_video(image_path: str, audio_path: str, fps: int = 25) -> str:
-    """
-    Create a video by looping the avatar image for the length of the audio.
-    LatentSync expects a VIDEO input.
-    """
     out_path = TEMP_DIR / f"still_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
     cmd = [
         "ffmpeg", "-y",
@@ -99,19 +91,20 @@ def generate(avatar_img, audio_wav, steps, guidance, seed, use_deepcache):
         setup()
         if avatar_img is None:
-            return None, "Please upload an avatar image!"
         if audio_wav is None:
-            return None, "Please upload an audio file!"
         img_path = str(Path(avatar_img).resolve())
         wav_path = str(Path(audio_wav).resolve())
-        # Make a temp mp4 from the single image + audio
         video_path = make_still_video(img_path, wav_path, fps=25)
         out_path = TEMP_DIR / f"result_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
-        # Use correct config path for LatentSync 1.5
         cmd = [
             "python", "-m", "scripts.inference",
             "--unet_config_path", "configs/unet/stage2.yaml",
@@ -128,21 +121,22 @@ def generate(avatar_img, audio_wav, steps, guidance, seed, use_deepcache):
         if use_deepcache:
             cmd.append("--enable_deepcache")
         run(cmd, cwd=str(REPO_DIR))
         if out_path.exists():
-            return str(out_path), "Video generated successfully!"
         else:
-            return None, "Video generation failed - output file not created"
     except subprocess.CalledProcessError as e:
-        error_msg = f"Command failed with return code {e.returncode}"
         return None, error_msg
     except Exception as e:
-        return None, f"Error: {str(e)}"
-# Gradio Interface
-with gr.Blocks(title="LatentSync - Lip Sync Generator", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
         # 🎬 LatentSync 1.5 - AI Lip Sync Generator
@@ -160,41 +154,33 @@ with gr.Blocks(title="LatentSync - Lip Sync Generator", theme=gr.themes.Soft())
         with gr.Column():
             avatar = gr.Image(
                 type="filepath",
-                label="📷 Avatar Image",
-                info="Upload a clear frontal face photo (JPG/PNG)"
             )
             audio = gr.Audio(
                 type="filepath",
-                label="🎵 Audio File",
-                format="wav",
-                info="Upload your audio (WAV format recommended)"
             )
         with gr.Column():
-            with gr.Group():
-                gr.Markdown("### ⚙️ Generation Settings")
-                steps = gr.Slider(
-                    10, 40, value=20, step=1,
-                    label="Inference Steps",
-                    info="Higher = better quality, slower"
-                )
-                guidance = gr.Slider(
-                    0.8, 2.0, value=1.0, step=0.1,
-                    label="Guidance Scale",
-                    info="Higher = better lip sync, may distort"
-                )
-                seed = gr.Number(
-                    value=1247, precision=0,
-                    label="Seed",
-                    info="For reproducible results"
-                )
-                deepcache = gr.Checkbox(
-                    value=True,
-                    label="Enable DeepCache (Faster)",
-                    info="Recommended for T4 GPU"
-                )
-    btn = gr.Button("🚀 Generate Lip-Synced Video", variant="primary", size="lg")
     status = gr.Textbox(label="Status", interactive=False)
     out = gr.Video(label="Generated Video")
@@ -218,4 +204,4 @@ with gr.Blocks(title="LatentSync - Lip Sync Generator", theme=gr.themes.Soft())
 if __name__ == "__main__":
     demo.queue(max_size=3)
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 from datetime import datetime
 import gradio as gr
 from huggingface_hub import snapshot_download
 ROOT = Path(__file__).parent.resolve()
 REPO_DIR = ROOT / "LatentSync"
 CKPT_DIR = REPO_DIR / "checkpoints"
 TEMP_DIR = REPO_DIR / "temp"
 # Use 1.5 on T4 16GB
 HF_CKPT_REPO = "ByteDance/LatentSync-1.5"
     print(" ".join(map(str, cmd)))
     subprocess.check_call(cmd, cwd=cwd)
+def create_mask_file():
+    """Create the missing mask.png file"""
+    mask_dir = REPO_DIR / "latentsync" / "utils"
+    mask_path = mask_dir / "mask.png"
+    if mask_path.exists():
+        return
+    mask_dir.mkdir(parents=True, exist_ok=True)
+    # Create mask using numpy and PIL
+    try:
+        import numpy as np
+        from PIL import Image
+        # Create 256x256 mask (white = inpaint mouth area, black = keep)
+        mask = np.zeros((256, 256), dtype=np.uint8)
+        # Create ellipse for mouth region (lower face)
+        center_x, center_y = 128, 180
+        for y in range(256):
+            for x in range(256):
+                # Ellipse: ((x-cx)/rx)^2 + ((y-cy)/ry)^2 <= 1
+                if ((x - center_x) / 90) ** 2 + ((y - center_y) / 64) ** 2 <= 1:
+                    mask[y, x] = 255
+        Image.fromarray(mask, mode='L').save(str(mask_path))
+        print(f"✓ Created mask at {mask_path}")
+    except Exception as e:
+        print(f"Warning: Could not create mask: {e}")
 def setup():
     if not REPO_DIR.exists():
+        print("Cloning LatentSync repository...")
         run(["git", "clone", "--depth", "1", "https://github.com/bytedance/LatentSync.git", str(REPO_DIR)])
     CKPT_DIR.mkdir(parents=True, exist_ok=True)
     TEMP_DIR.mkdir(parents=True, exist_ok=True)
+    # Create mask file before running inference
+    create_mask_file()
+    # Download checkpoints
+    print("Downloading model checkpoints...")
     snapshot_download(
         repo_id=HF_CKPT_REPO,
         local_dir=str(CKPT_DIR),
         local_dir_use_symlinks=False,
     )
+    print("✓ Setup complete")
 def make_still_video(image_path: str, audio_path: str, fps: int = 25) -> str:
+    """Convert static image + audio to video"""
     out_path = TEMP_DIR / f"still_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
     cmd = [
         "ffmpeg", "-y",
         setup()
         if avatar_img is None:
+            return None, "❌ Please upload an avatar image!"
         if audio_wav is None:
+            return None, "❌ Please upload an audio file!"
         img_path = str(Path(avatar_img).resolve())
         wav_path = str(Path(audio_wav).resolve())
+        # Create video from image + audio
+        print("Creating input video...")
         video_path = make_still_video(img_path, wav_path, fps=25)
         out_path = TEMP_DIR / f"result_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
+        # Fixed config path for LatentSync 1.5
         cmd = [
             "python", "-m", "scripts.inference",
             "--unet_config_path", "configs/unet/stage2.yaml",
         if use_deepcache:
             cmd.append("--enable_deepcache")
+        print("Generating lip-synced video...")
         run(cmd, cwd=str(REPO_DIR))
         if out_path.exists():
+            return str(out_path), "✅ Video generated successfully!"
         else:
+            return None, "❌ Video generation failed - output file not created"
     except subprocess.CalledProcessError as e:
+        error_msg = f"❌ Command failed with return code {e.returncode}"
         return None, error_msg
     except Exception as e:
+        return None, f"❌ Error: {str(e)}"
+# Gradio Interface - Compatible with Gradio 4.44.1
+with gr.Blocks(title="LatentSync Lip Sync") as demo:
     gr.Markdown(
         """
         # 🎬 LatentSync 1.5 - AI Lip Sync Generator
         with gr.Column():
             avatar = gr.Image(
                 type="filepath",
+                label="📷 Avatar Image (JPG/PNG)"
             )
             audio = gr.Audio(
                 type="filepath",
+                label="🎵 Audio File (WAV)"
             )
         with gr.Column():
+            gr.Markdown("### ⚙️ Generation Settings")
+            steps = gr.Slider(
+                10, 40, value=20, step=1,
+                label="Inference Steps (Higher = Better Quality)"
+            )
+            guidance = gr.Slider(
+                0.8, 2.0, value=1.0, step=0.1,
+                label="Guidance Scale (Higher = Stronger Lip Sync)"
+            )
+            seed = gr.Number(
+                value=1247, precision=0,
+                label="Seed (For Reproducibility)"
+            )
+            deepcache = gr.Checkbox(
+                value=True,
+                label="Enable DeepCache (Faster - Recommended for T4)"
+            )
+    btn = gr.Button("🚀 Generate Lip-Synced Video", variant="primary")
     status = gr.Textbox(label="Status", interactive=False)
     out = gr.Video(label="Generated Video")
 if __name__ == "__main__":
     demo.queue(max_size=3)
+    demo.launch()