import gc import os import time import traceback import torch from DeepCache import DeepCacheSDHelper from latentsync.pipelines.lipsync_pipeline import LipsyncPipeline from shared.model_manager import ModelManager from config import MODELS_DIR torch.backends.cudnn.benchmark = True torch.backends.cudnn.deterministic = False os.makedirs(MODELS_DIR, exist_ok=True) def get_quality_params(level: str) -> tuple: """Get lipsync parameters based on quality level Args: level: Quality level (Fast, Normal, Medium, Best, Super Best) Returns: tuple of (num_frames, num_inference_steps, guidance_scale) """ params = { "Fast": (12, 15, 1.0), "Normal": (12, 20, 1.5), "Medium": (16, 30, 1.5), "Best": (20, 40, 1.5), "Super Best": (24, 50, 1.5), } return params.get(level, (12, 20, 1.0)) def apply_lipsync( video_input_path, audio_path, video_out_path, crop_size=256, quality_level="Normal" ): print(f"\n{'=' * 60}") print("LIPSYNC START") print(f"Input video: {video_input_path}") print(f"Input audio: {audio_path}") print(f"Output video: {video_out_path}") print(f"Crop size: {crop_size}x{crop_size}") print(f"{'=' * 60}\n") manager = ModelManager.get_instance() config = manager.get_latentsync_config() vae = manager.load_vae() audio_encoder = manager.load_whisper_encoder( manager.get_whisper_model_path(config.model.cross_attention_dim), "cuda", config.data.num_frames, ) unet = manager.load_latentsync_unet() scheduler = manager.get_scheduler() pipeline = LipsyncPipeline( vae=vae, audio_encoder=audio_encoder, unet=unet, scheduler=scheduler, ).to("cuda") print("Enabling DeepCache (cache_interval=3, cache_branch_id=0)...") deepcache_helper = DeepCacheSDHelper(pipe=pipeline) deepcache_helper.set_params(cache_interval=3, cache_branch_id=0) deepcache_helper.enable() try: if not torch.cuda.is_available(): raise RuntimeError("CUDA not available - GPU required for lipsync") num_frames, num_inference_steps, guidance_scale = get_quality_params( quality_level ) print(f"\nQuality level: {quality_level}") print("Parameters:") print(f" num_frames: {num_frames}") print(f" num_inference_steps: {num_inference_steps}") print(f" guidance_scale: {guidance_scale}") print(f" resolution: {config.data.resolution}") print(f"Initial seed: {torch.initial_seed()}") print("\nStarting pipeline inference...") print( f"Parameters: num_frames={num_frames}, num_inference_steps={num_inference_steps}, " f"guidance_scale={guidance_scale}, size={crop_size}x{crop_size}" ) try: with torch.no_grad(): result = pipeline( video_path=video_input_path, audio_path=audio_path, video_out_path=video_out_path, video_mask_path=video_out_path.replace(".mp4", "_mask.mp4"), num_frames=num_frames, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, weight_dtype=torch.float16, width=crop_size, height=crop_size, ) print("Pipeline completed successfully") except RuntimeError as e: error_msg = str(e).lower() print(f"RuntimeError in pipeline: {e}") if "out of memory" in error_msg or "cuda out of memory" in error_msg: print("GPU OOM DETECTED!") torch.cuda.empty_cache() raise RuntimeError( "GPU out of memory during lipsync. Try: 1) Shorter video 2) Lower resolution 3) Close other GPU apps" ) raise except Exception as e: print(f"Unexpected error in pipeline: {e}") print(f"Error type: {type(e).__name__}") traceback.print_exc() raise finally: print("Clearing GPU cache...") torch.cuda.empty_cache() gc.collect() print(f"\n{'=' * 60}") print(f"LIPSYNC SUCCESS - Output: {video_out_path}") print(f"{'=' * 60}\n") time.sleep(1) if not os.path.exists(video_out_path): raise RuntimeError( f"Pipeline succeeded but output file not created: {video_out_path}" ) return video_out_path except Exception as e: print(f"\n{'=' * 60}") print(f"LIPSYNC FAILED: {type(e).__name__}") print(f"Error: {e}") print(f"{'=' * 60}\n") traceback.print_exc() raise