import torch
from transformers import AutoProcessor, AutoModelForCausalLM
from ultralytics import YOLO
import gdown
import os
from safetensors.torch import load_file  # Safetensors loading method

def download_model_from_drive(file_id, destination_path):
    """Download the model from Google Drive using gdown."""
    url = f"https://drive.google.com/uc?id={file_id}"
    gdown.download(url, destination_path, quiet=False)

def load_models(device='cpu'):
    """Initialize and load all required models."""
    # Set default dtype for torch
    torch.set_default_dtype(torch.float32)

    # Download the model from Google Drive (if not already present)
    model_file_path = 'model.safetensors'  # Use the correct model file name
    if not os.path.exists(model_file_path):
        file_id = "1hUCqZ3X8mcM-KcwWFjcsFg7PA0hUvE3k"  # Replace with your Google Drive file ID
        download_model_from_drive(file_id, model_file_path)
    
    # Load the YOLO model
    yolo_model = YOLO('best.pt').to(device)

    # Load processor and caption model
    processor = AutoProcessor.from_pretrained(
        "microsoft/Florence-2-base", 
        trust_remote_code=True
    )

    # Load the caption model from the downloaded .safetensors file
    # Use safetensors library to load the model
    caption_model = load_file(model_file_path, framework="pt", device=device)
    
    return {
        'yolo_model': yolo_model,
        'processor': processor,
        'caption_model': caption_model
    }