Spaces:
Runtime error
Runtime error
| import gc | |
| import os | |
| import time | |
| import traceback | |
| import torch | |
| from DeepCache import DeepCacheSDHelper | |
| from latentsync.pipelines.lipsync_pipeline import LipsyncPipeline | |
| from shared.model_manager import ModelManager | |
| from config import MODELS_DIR | |
| torch.backends.cudnn.benchmark = True | |
| torch.backends.cudnn.deterministic = False | |
| os.makedirs(MODELS_DIR, exist_ok=True) | |
| def get_quality_params(level: str) -> tuple: | |
| """Get lipsync parameters based on quality level | |
| Args: | |
| level: Quality level (Fast, Normal, Medium, Best, Super Best) | |
| Returns: | |
| tuple of (num_frames, num_inference_steps, guidance_scale) | |
| """ | |
| params = { | |
| "Fast": (12, 15, 1.0), | |
| "Normal": (12, 20, 1.5), | |
| "Medium": (16, 30, 1.5), | |
| "Best": (20, 40, 1.5), | |
| "Super Best": (24, 50, 1.5), | |
| } | |
| return params.get(level, (12, 20, 1.0)) | |
| def apply_lipsync( | |
| video_input_path, audio_path, video_out_path, crop_size=256, quality_level="Normal" | |
| ): | |
| print(f"\n{'=' * 60}") | |
| print("LIPSYNC START") | |
| print(f"Input video: {video_input_path}") | |
| print(f"Input audio: {audio_path}") | |
| print(f"Output video: {video_out_path}") | |
| print(f"Crop size: {crop_size}x{crop_size}") | |
| print(f"{'=' * 60}\n") | |
| manager = ModelManager.get_instance() | |
| config = manager.get_latentsync_config() | |
| vae = manager.load_vae() | |
| audio_encoder = manager.load_whisper_encoder( | |
| manager.get_whisper_model_path(config.model.cross_attention_dim), | |
| "cuda", | |
| config.data.num_frames, | |
| ) | |
| unet = manager.load_latentsync_unet() | |
| scheduler = manager.get_scheduler() | |
| pipeline = LipsyncPipeline( | |
| vae=vae, | |
| audio_encoder=audio_encoder, | |
| unet=unet, | |
| scheduler=scheduler, | |
| ).to("cuda") | |
| print("Enabling DeepCache (cache_interval=3, cache_branch_id=0)...") | |
| deepcache_helper = DeepCacheSDHelper(pipe=pipeline) | |
| deepcache_helper.set_params(cache_interval=3, cache_branch_id=0) | |
| deepcache_helper.enable() | |
| try: | |
| if not torch.cuda.is_available(): | |
| raise RuntimeError("CUDA not available - GPU required for lipsync") | |
| num_frames, num_inference_steps, guidance_scale = get_quality_params( | |
| quality_level | |
| ) | |
| print(f"\nQuality level: {quality_level}") | |
| print("Parameters:") | |
| print(f" num_frames: {num_frames}") | |
| print(f" num_inference_steps: {num_inference_steps}") | |
| print(f" guidance_scale: {guidance_scale}") | |
| print(f" resolution: {config.data.resolution}") | |
| print(f"Initial seed: {torch.initial_seed()}") | |
| print("\nStarting pipeline inference...") | |
| print( | |
| f"Parameters: num_frames={num_frames}, num_inference_steps={num_inference_steps}, " | |
| f"guidance_scale={guidance_scale}, size={crop_size}x{crop_size}" | |
| ) | |
| try: | |
| with torch.no_grad(): | |
| result = pipeline( | |
| video_path=video_input_path, | |
| audio_path=audio_path, | |
| video_out_path=video_out_path, | |
| video_mask_path=video_out_path.replace(".mp4", "_mask.mp4"), | |
| num_frames=num_frames, | |
| num_inference_steps=num_inference_steps, | |
| guidance_scale=guidance_scale, | |
| weight_dtype=torch.float16, | |
| width=crop_size, | |
| height=crop_size, | |
| ) | |
| print("Pipeline completed successfully") | |
| except RuntimeError as e: | |
| error_msg = str(e).lower() | |
| print(f"RuntimeError in pipeline: {e}") | |
| if "out of memory" in error_msg or "cuda out of memory" in error_msg: | |
| print("GPU OOM DETECTED!") | |
| torch.cuda.empty_cache() | |
| raise RuntimeError( | |
| "GPU out of memory during lipsync. Try: 1) Shorter video 2) Lower resolution 3) Close other GPU apps" | |
| ) | |
| raise | |
| except Exception as e: | |
| print(f"Unexpected error in pipeline: {e}") | |
| print(f"Error type: {type(e).__name__}") | |
| traceback.print_exc() | |
| raise | |
| finally: | |
| print("Clearing GPU cache...") | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| print(f"\n{'=' * 60}") | |
| print(f"LIPSYNC SUCCESS - Output: {video_out_path}") | |
| print(f"{'=' * 60}\n") | |
| time.sleep(1) | |
| if not os.path.exists(video_out_path): | |
| raise RuntimeError( | |
| f"Pipeline succeeded but output file not created: {video_out_path}" | |
| ) | |
| return video_out_path | |
| except Exception as e: | |
| print(f"\n{'=' * 60}") | |
| print(f"LIPSYNC FAILED: {type(e).__name__}") | |
| print(f"Error: {e}") | |
| print(f"{'=' * 60}\n") | |
| traceback.print_exc() | |
| raise | |