import torch
from diffusers import DiffusionPipeline
import spaces
from config import MODEL_ID

def load_pipeline():
    """
    Load and configure the Open-Sora-v2 pipeline
    """
    try:
        # Load the pipeline with appropriate configuration
        pipeline = DiffusionPipeline.from_pretrained(
            MODEL_ID,
            torch_dtype=torch.float16,
            variant="fp16",
            use_safetensors=True
        )
        
        # Move to GPU if available
        if torch.cuda.is_available():
            pipeline = pipeline.to("cuda")
            
            # Enable memory efficient attention if available
            try:
                pipeline.enable_xformers_memory_efficient_attention()
            except Exception:
                print("xformers not available, using default attention")
                
            # Enable CPU offloading for memory efficiency
            pipeline.enable_model_cpu_offload()
        
        return pipeline
        
    except Exception as e:
        print(f"Error loading pipeline: {e}")
        raise

@spaces.GPU(duration=1500)
def compile_transformer():
    """
    Optional: Compile the transformer for better performance
    This is experimental and may not work with all models
    """
    try:
        pipeline = load_pipeline()
        
        # Capture example inputs
        with spaces.aoti_capture(pipeline.transformer) as call:
            pipeline("test prompt generation")
        
        # Export the model
        exported = torch.export.export(
            pipeline.transformer,
            args=call.args,
            kwargs=call.kwargs,
        )
        
        # Compile the exported model
        compiled_transformer = spaces.aoti_compile(exported)
        
        # Apply compiled model to pipeline
        spaces.aoti_apply(compiled_transformer, pipeline.transformer)
        
        return pipeline
        
    except Exception as e:
        print(f"Compilation failed, using unoptimized model: {e}")
        return load_pipeline()