import torch from diffusers import DiffusionPipeline import spaces from config import MODEL_ID def load_pipeline(): """ Load and configure the Open-Sora-v2 pipeline """ try: # Load the pipeline with appropriate configuration pipeline = DiffusionPipeline.from_pretrained( MODEL_ID, torch_dtype=torch.float16, variant="fp16", use_safetensors=True ) # Move to GPU if available if torch.cuda.is_available(): pipeline = pipeline.to("cuda") # Enable memory efficient attention if available try: pipeline.enable_xformers_memory_efficient_attention() except Exception: print("xformers not available, using default attention") # Enable CPU offloading for memory efficiency pipeline.enable_model_cpu_offload() return pipeline except Exception as e: print(f"Error loading pipeline: {e}") raise @spaces.GPU(duration=1500) def compile_transformer(): """ Optional: Compile the transformer for better performance This is experimental and may not work with all models """ try: pipeline = load_pipeline() # Capture example inputs with spaces.aoti_capture(pipeline.transformer) as call: pipeline("test prompt generation") # Export the model exported = torch.export.export( pipeline.transformer, args=call.args, kwargs=call.kwargs, ) # Compile the exported model compiled_transformer = spaces.aoti_compile(exported) # Apply compiled model to pipeline spaces.aoti_apply(compiled_transformer, pipeline.transformer) return pipeline except Exception as e: print(f"Compilation failed, using unoptimized model: {e}") return load_pipeline()