Spaces:
Runtime error
Runtime error
| """Lipsync processing wrapper for OutofLipSync""" | |
| import os | |
| import traceback | |
| from ffmpy import FFmpeg | |
| from lipsync import apply_lipsync | |
| def get_video_info(video_path: str) -> dict: | |
| """Get video information: resolution, duration, fps | |
| Args: | |
| video_path: Path to video | |
| Returns: | |
| Dict with keys: width, height, duration, fps | |
| """ | |
| import json | |
| import subprocess | |
| from fractions import Fraction | |
| cmd = [ | |
| "ffprobe", | |
| "-v", | |
| "error", | |
| "-select_streams", | |
| "v:0", | |
| "-show_entries", | |
| "stream=width,height,r_frame_rate", | |
| "-show_entries", | |
| "format=duration", | |
| "-of", | |
| "json", | |
| video_path, | |
| ] | |
| result = subprocess.run(cmd, capture_output=True, text=True, check=True) | |
| data = json.loads(result.stdout) | |
| width = data["streams"][0]["width"] | |
| height = data["streams"][0]["height"] | |
| fps = float(Fraction(data["streams"][0]["r_frame_rate"])) | |
| duration = float(data["format"]["duration"]) | |
| return {"width": width, "height": height, "fps": fps, "duration": duration} | |
| def apply_lipsync_to_video( | |
| video_path: str, | |
| audio_16k_path: str, | |
| output_dir: str, | |
| model_type: str = "LatentSync v1.6", | |
| quality_level: str = "Normal", | |
| ) -> tuple: | |
| """Apply lipsync to video using clean 16k audio | |
| Args: | |
| video_path: Path to input video | |
| audio_16k_path: Path to 16kHz audio | |
| output_dir: Directory to save output | |
| model_type: Model type for lipsync ("LatentSync v1.6" or "MuseTalk v1.5") | |
| quality_level: Quality level ("Fast", "Normal", "Medium", "Best", "Super Best") | |
| Returns: | |
| Tuple of (lipsynced_video_path, video_info) | |
| """ | |
| try: | |
| lipsynced_video = os.path.join(output_dir, "output_with_lipsync.mp4") | |
| if model_type == "LatentSync v1.6": | |
| crop_size = 512 | |
| print( | |
| f"Using LatentSync: video={video_path}, audio={audio_16k_path}, crop_size={crop_size}, quality={quality_level}" | |
| ) | |
| apply_lipsync( | |
| video_path, audio_16k_path, lipsynced_video, crop_size, quality_level | |
| ) | |
| else: | |
| raise ValueError(f"Unknown model_type: {model_type}") | |
| video_info = get_video_info(lipsynced_video) | |
| print( | |
| f"Lipsynced video: {lipsynced_video}, size: {video_info['width']}x{video_info['height']}" | |
| ) | |
| return lipsynced_video, video_info | |
| except RuntimeError as e: | |
| if "out of memory" in str(e).lower(): | |
| print("GPU OOM Error in lipsync processing!") | |
| raise RuntimeError( | |
| "GPU out of memory during lipsync. Try a shorter video or lower resolution." | |
| ) | |
| if "face not detected" in str(e).lower(): | |
| raise RuntimeError( | |
| "Face detection failed in lipsync pipeline. Please upload a video with a clear, visible face." | |
| ) | |
| print(f"Runtime Error in lipsync processing: {e}") | |
| traceback.print_exc() | |
| raise | |