File size: 1,994 Bytes
a5f3e62
 
 
 
5abb768
 
 
 
 
 
9a790d8
bc2b055
 
 
 
605e9d1
e73ccce
9a790d8
5abb768
bc2b055
b67d7b3
a5f3e62
5abb768
bc2b055
a5f3e62
5abb768
 
 
 
bc2b055
e73ccce
 
 
5abb768
e73ccce
 
bc2b055
 
b67d7b3
5abb768
bc2b055
a5f3e62
bc2b055
5abb768
a5f3e62
bc2b055
a5f3e62
bc2b055
 
 
b67d7b3
bc2b055
b67d7b3
5abb768
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os
import torch
import gc
import gradio as gr
from transformers import AutoProcessor, AutoModelForCausalLM, AutoTokenizer, AutoConfig

# --- THE CRITICAL FIX ---
# We must manually register Florence2 so AutoModelForCausalLM accepts it
from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING
from transformers.models.auto.configuration_auto import CONFIG_MAPPING

# Configuration
MODELS = {
    "Dolphin-Uncensored (Fast)": "cognitivetech/Dolphin-2.9-Qwen2-0.5B",
    "Qwen-2.5 (Standard)": "Qwen/Qwen2.5-0.5B-Instruct"
}
FLORENCE_ID = "florence-community/Florence-2-base-ft"

# Global storage
storage = {"eyes": None, "brain": None, "active_brain": None}

def load_models_on_demand(brain_name, progress=gr.Progress()):
    # 1. Load Florence (Eyes)
    if storage["eyes"] is None:
        progress(0.2, desc="Initializing Vision (Florence-2)...")
        
        # We load config first to ensure it's registered
        config = AutoConfig.from_pretrained(FLORENCE_ID, trust_remote_code=True)
        
        storage["eyes"] = {
            "m": AutoModelForCausalLM.from_pretrained(
                FLORENCE_ID, 
                trust_remote_code=True, 
                config=config, # Pass the config explicitly
                torch_dtype=torch.float32
            ).eval(),
            "p": AutoProcessor.from_pretrained(FLORENCE_ID, trust_remote_code=True)
        }
    
    # 2. Load Brain (Dolphin/Qwen)
    if storage["active_brain"] != brain_name:
        progress(0.5, desc=f"Switching Brain to {brain_name}...")
        storage["brain"] = None
        gc.collect()
        
        storage["brain"] = {
            "m": AutoModelForCausalLM.from_pretrained(MODELS[brain_name], torch_dtype=torch.float32).eval(),
            "t": AutoTokenizer.from_pretrained(MODELS[brain_name])
        }
        storage["active_brain"] = brain_name
    
    return storage["eyes"], storage["brain"]

# ... (Rest of your process_request and UI code stays the same)