import gc
import os
import time
import traceback

import torch

from DeepCache import DeepCacheSDHelper
from latentsync.pipelines.lipsync_pipeline import LipsyncPipeline
from shared.model_manager import ModelManager
from config import MODELS_DIR

torch.backends.cudnn.benchmark = True
torch.backends.cudnn.deterministic = False

os.makedirs(MODELS_DIR, exist_ok=True)


def get_quality_params(level: str) -> tuple:
    """Get lipsync parameters based on quality level

    Args:
        level: Quality level (Fast, Normal, Medium, Best, Super Best)

    Returns:
        tuple of (num_frames, num_inference_steps, guidance_scale)
    """
    params = {
        "Fast": (12, 15, 1.0),
        "Normal": (12, 20, 1.5),
        "Medium": (16, 30, 1.5),
        "Best": (20, 40, 1.5),
        "Super Best": (24, 50, 1.5),
    }
    return params.get(level, (12, 20, 1.0))


def apply_lipsync(
    video_input_path, audio_path, video_out_path, crop_size=256, quality_level="Normal"
):
    print(f"\n{'=' * 60}")
    print("LIPSYNC START")
    print(f"Input video: {video_input_path}")
    print(f"Input audio: {audio_path}")
    print(f"Output video: {video_out_path}")
    print(f"Crop size: {crop_size}x{crop_size}")
    print(f"{'=' * 60}\n")

    manager = ModelManager.get_instance()

    config = manager.get_latentsync_config()
    vae = manager.load_vae()
    audio_encoder = manager.load_whisper_encoder(
        manager.get_whisper_model_path(config.model.cross_attention_dim),
        "cuda",
        config.data.num_frames,
    )
    unet = manager.load_latentsync_unet()
    scheduler = manager.get_scheduler()

    pipeline = LipsyncPipeline(
        vae=vae,
        audio_encoder=audio_encoder,
        unet=unet,
        scheduler=scheduler,
    ).to("cuda")

    print("Enabling DeepCache (cache_interval=3, cache_branch_id=0)...")
    deepcache_helper = DeepCacheSDHelper(pipe=pipeline)
    deepcache_helper.set_params(cache_interval=3, cache_branch_id=0)
    deepcache_helper.enable()

    try:
        if not torch.cuda.is_available():
            raise RuntimeError("CUDA not available - GPU required for lipsync")

        num_frames, num_inference_steps, guidance_scale = get_quality_params(
            quality_level
        )

        print(f"\nQuality level: {quality_level}")
        print("Parameters:")
        print(f"  num_frames: {num_frames}")
        print(f"  num_inference_steps: {num_inference_steps}")
        print(f"  guidance_scale: {guidance_scale}")
        print(f"  resolution: {config.data.resolution}")

        print(f"Initial seed: {torch.initial_seed()}")

        print("\nStarting pipeline inference...")
        print(
            f"Parameters: num_frames={num_frames}, num_inference_steps={num_inference_steps}, "
            f"guidance_scale={guidance_scale}, size={crop_size}x{crop_size}"
        )

        try:
            with torch.no_grad():
                result = pipeline(
                    video_path=video_input_path,
                    audio_path=audio_path,
                    video_out_path=video_out_path,
                    video_mask_path=video_out_path.replace(".mp4", "_mask.mp4"),
                    num_frames=num_frames,
                    num_inference_steps=num_inference_steps,
                    guidance_scale=guidance_scale,
                    weight_dtype=torch.float16,
                    width=crop_size,
                    height=crop_size,
                )
            print("Pipeline completed successfully")

        except RuntimeError as e:
            error_msg = str(e).lower()
            print(f"RuntimeError in pipeline: {e}")
            if "out of memory" in error_msg or "cuda out of memory" in error_msg:
                print("GPU OOM DETECTED!")
                torch.cuda.empty_cache()
                raise RuntimeError(
                    "GPU out of memory during lipsync. Try: 1) Shorter video 2) Lower resolution 3) Close other GPU apps"
                )
            raise
        except Exception as e:
            print(f"Unexpected error in pipeline: {e}")
            print(f"Error type: {type(e).__name__}")
            traceback.print_exc()
            raise
        finally:
            print("Clearing GPU cache...")
            torch.cuda.empty_cache()
            gc.collect()

        print(f"\n{'=' * 60}")
        print(f"LIPSYNC SUCCESS - Output: {video_out_path}")
        print(f"{'=' * 60}\n")

        time.sleep(1)
        if not os.path.exists(video_out_path):
            raise RuntimeError(
                f"Pipeline succeeded but output file not created: {video_out_path}"
            )

        return video_out_path

    except Exception as e:
        print(f"\n{'=' * 60}")
        print(f"LIPSYNC FAILED: {type(e).__name__}")
        print(f"Error: {e}")
        print(f"{'=' * 60}\n")
        traceback.print_exc()
        raise