import torch
from transformers import AutoProcessor, AutoModelForCausalLM
from ultralytics import YOLO
import gdown
import os
from safetensors.torch import load_file  # Safetensors loading method

# Set a custom cache directory for gdown
gdown_cache_dir = os.path.join(os.getcwd(), "cache")
os.makedirs(gdown_cache_dir, exist_ok=True)
os.environ["GDOWN_CACHE"] = gdown_cache_dir  # Explicitly set GDOWN_CACHE

def download_model_from_drive(file_id, destination_path):
    """Download the model from Google Drive using gdown."""
    # Construct the Google Drive download URL
    url = f"https://drive.google.com/uc?id={file_id}"
    # Ensure the destination directory exists
    directory = os.path.dirname(destination_path)
    if directory:
        os.makedirs(directory, exist_ok=True)
    # Download the file
    gdown.download(url, destination_path, quiet=False)

def load_models(device='cpu'):
    """Load YOLO model and the caption generation model."""
    # Define the file path for the .safetensors model
    model_file_path = "model.safetensors"  # Adjust based on your file name
    
    # Download the model file if it doesn't exist
    if not os.path.exists(model_file_path):
        file_id = "1hUCqZ3X8mcM-KcwWFjcsFg7PA0hUvE3k"  # Replace with your file ID
        print(f"Downloading model to {model_file_path}...")
        download_model_from_drive(file_id, model_file_path)

    # Load the YOLO model
    print("Loading YOLO model...")
    yolo_model = YOLO("best.pt").to(device)

    # Load the processor for the caption model
    print("Loading processor for the caption model...")
    processor = AutoProcessor.from_pretrained(
        "microsoft/Florence-2-base",
        trust_remote_code=True
    )

    # Load the caption model state dict from .safetensors
    print("Loading caption generation model...")
    model_state_dict = load_file(model_file_path)  # Load tensors from .safetensors
    caption_model = AutoModelForCausalLM.from_pretrained(
        "microsoft/Florence-2-base",
        trust_remote_code=True
    )
    caption_model.load_state_dict(model_state_dict)  # Map tensors to the model
    caption_model.to(device)  # Move the model to the correct device

    print("Models loaded successfully!")
    return {
        'yolo_model': yolo_model,
        'processor': processor,
        'caption_model': caption_model
    }

# Usage example:
if __name__ == "__main__":
    device = "cuda" if torch.cuda.is_available() else "cpu"
    models = load_models(device=device)
    print("All models are ready to use!")