import os
import torch
import gc
import gradio as gr
from transformers import AutoProcessor, AutoModelForCausalLM, AutoTokenizer, AutoConfig

# --- THE CRITICAL FIX ---
# We must manually register Florence2 so AutoModelForCausalLM accepts it
from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING
from transformers.models.auto.configuration_auto import CONFIG_MAPPING

# Configuration
MODELS = {
    "Dolphin-Uncensored (Fast)": "cognitivetech/Dolphin-2.9-Qwen2-0.5B",
    "Qwen-2.5 (Standard)": "Qwen/Qwen2.5-0.5B-Instruct"
}
FLORENCE_ID = "florence-community/Florence-2-base-ft"

# Global storage
storage = {"eyes": None, "brain": None, "active_brain": None}

def load_models_on_demand(brain_name, progress=gr.Progress()):
    # 1. Load Florence (Eyes)
    if storage["eyes"] is None:
        progress(0.2, desc="Initializing Vision (Florence-2)...")
        
        # We load config first to ensure it's registered
        config = AutoConfig.from_pretrained(FLORENCE_ID, trust_remote_code=True)
        
        storage["eyes"] = {
            "m": AutoModelForCausalLM.from_pretrained(
                FLORENCE_ID, 
                trust_remote_code=True, 
                config=config, # Pass the config explicitly
                torch_dtype=torch.float32
            ).eval(),
            "p": AutoProcessor.from_pretrained(FLORENCE_ID, trust_remote_code=True)
        }
    
    # 2. Load Brain (Dolphin/Qwen)
    if storage["active_brain"] != brain_name:
        progress(0.5, desc=f"Switching Brain to {brain_name}...")
        storage["brain"] = None
        gc.collect()
        
        storage["brain"] = {
            "m": AutoModelForCausalLM.from_pretrained(MODELS[brain_name], torch_dtype=torch.float32).eval(),
            "t": AutoTokenizer.from_pretrained(MODELS[brain_name])
        }
        storage["active_brain"] = brain_name
    
    return storage["eyes"], storage["brain"]

# ... (Rest of your process_request and UI code stays the same)