|
|
import os |
|
|
import torch |
|
|
import gc |
|
|
import gradio as gr |
|
|
from transformers import AutoProcessor, AutoModelForCausalLM, AutoTokenizer, AutoConfig |
|
|
|
|
|
|
|
|
|
|
|
from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING |
|
|
from transformers.models.auto.configuration_auto import CONFIG_MAPPING |
|
|
|
|
|
|
|
|
MODELS = { |
|
|
"Dolphin-Uncensored (Fast)": "cognitivetech/Dolphin-2.9-Qwen2-0.5B", |
|
|
"Qwen-2.5 (Standard)": "Qwen/Qwen2.5-0.5B-Instruct" |
|
|
} |
|
|
FLORENCE_ID = "florence-community/Florence-2-base-ft" |
|
|
|
|
|
|
|
|
storage = {"eyes": None, "brain": None, "active_brain": None} |
|
|
|
|
|
def load_models_on_demand(brain_name, progress=gr.Progress()): |
|
|
|
|
|
if storage["eyes"] is None: |
|
|
progress(0.2, desc="Initializing Vision (Florence-2)...") |
|
|
|
|
|
|
|
|
config = AutoConfig.from_pretrained(FLORENCE_ID, trust_remote_code=True) |
|
|
|
|
|
storage["eyes"] = { |
|
|
"m": AutoModelForCausalLM.from_pretrained( |
|
|
FLORENCE_ID, |
|
|
trust_remote_code=True, |
|
|
config=config, |
|
|
torch_dtype=torch.float32 |
|
|
).eval(), |
|
|
"p": AutoProcessor.from_pretrained(FLORENCE_ID, trust_remote_code=True) |
|
|
} |
|
|
|
|
|
|
|
|
if storage["active_brain"] != brain_name: |
|
|
progress(0.5, desc=f"Switching Brain to {brain_name}...") |
|
|
storage["brain"] = None |
|
|
gc.collect() |
|
|
|
|
|
storage["brain"] = { |
|
|
"m": AutoModelForCausalLM.from_pretrained(MODELS[brain_name], torch_dtype=torch.float32).eval(), |
|
|
"t": AutoTokenizer.from_pretrained(MODELS[brain_name]) |
|
|
} |
|
|
storage["active_brain"] = brain_name |
|
|
|
|
|
return storage["eyes"], storage["brain"] |
|
|
|
|
|
|
|
|
|