import os import torch import gc import gradio as gr from transformers import AutoProcessor, AutoModelForCausalLM, AutoTokenizer, AutoConfig # --- THE CRITICAL FIX --- # We must manually register Florence2 so AutoModelForCausalLM accepts it from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING from transformers.models.auto.configuration_auto import CONFIG_MAPPING # Configuration MODELS = { "Dolphin-Uncensored (Fast)": "cognitivetech/Dolphin-2.9-Qwen2-0.5B", "Qwen-2.5 (Standard)": "Qwen/Qwen2.5-0.5B-Instruct" } FLORENCE_ID = "florence-community/Florence-2-base-ft" # Global storage storage = {"eyes": None, "brain": None, "active_brain": None} def load_models_on_demand(brain_name, progress=gr.Progress()): # 1. Load Florence (Eyes) if storage["eyes"] is None: progress(0.2, desc="Initializing Vision (Florence-2)...") # We load config first to ensure it's registered config = AutoConfig.from_pretrained(FLORENCE_ID, trust_remote_code=True) storage["eyes"] = { "m": AutoModelForCausalLM.from_pretrained( FLORENCE_ID, trust_remote_code=True, config=config, # Pass the config explicitly torch_dtype=torch.float32 ).eval(), "p": AutoProcessor.from_pretrained(FLORENCE_ID, trust_remote_code=True) } # 2. Load Brain (Dolphin/Qwen) if storage["active_brain"] != brain_name: progress(0.5, desc=f"Switching Brain to {brain_name}...") storage["brain"] = None gc.collect() storage["brain"] = { "m": AutoModelForCausalLM.from_pretrained(MODELS[brain_name], torch_dtype=torch.float32).eval(), "t": AutoTokenizer.from_pretrained(MODELS[brain_name]) } storage["active_brain"] = brain_name return storage["eyes"], storage["brain"] # ... (Rest of your process_request and UI code stays the same)