Spaces:
Sleeping
Sleeping
File size: 2,032 Bytes
ee3f04c 9b51450 ee3f04c 9b51450 ee3f04c 9b51450 ee3f04c 9b51450 ee3f04c 9b51450 ee3f04c 9b51450 ee3f04c 9b51450 ee3f04c 9b51450 ee3f04c 9b51450 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | import torch
from transformers import (
AutoModel,
AutoTokenizer,
AutoModelForSequenceClassification,
Qwen2VLForConditionalGeneration,
AutoProcessor,
)
from gliner import GLiNER
from rag.config import Settings
from rag.logging_utils import get_logger
logger = get_logger(__name__)
class Models:
def __init__(self, settings: Settings):
self.settings = settings
# Router CPU
logger.info("🧠 Loading GLiNER router on CPU: %s", settings.router_model_id)
self.router_model = GLiNER.from_pretrained(settings.router_model_id).to("cpu")
self.router_model.eval()
# Embedding
logger.info("🔹 Loading embedder: %s", settings.embed_model_id)
self.embed_tokenizer = AutoTokenizer.from_pretrained(settings.embed_model_id, trust_remote_code=False)
self.embed_model = AutoModel.from_pretrained(
settings.embed_model_id,
trust_remote_code=False,
torch_dtype=torch.bfloat16,
device_map="auto",
)
self.embed_model.eval()
# Reranker
logger.info("⚖️ Loading reranker: %s", settings.rerank_model_id)
self.rerank_tokenizer = AutoTokenizer.from_pretrained(settings.rerank_model_id)
self.rerank_model = AutoModelForSequenceClassification.from_pretrained(
settings.rerank_model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
)
self.rerank_model.eval()
# Vision
logger.info("👁️ Loading vision model: %s", settings.gen_model_id)
self.gen_model = Qwen2VLForConditionalGeneration.from_pretrained(
settings.gen_model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
)
self.gen_model.eval()
self.gen_processor = AutoProcessor.from_pretrained(settings.gen_model_id)
def load_models(settings: Settings | None = None) -> Models:
if settings is None:
settings = Settings()
return Models(settings)
|