Spaces:
Paused
Paused
File size: 1,633 Bytes
2e91995 1804a7a 2e91995 1804a7a 2e91995 1804a7a 2e91995 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import torch
import gc
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from src.core.config import Config
class ModelEngine:
def __init__(self):
self.config = Config()
self.loaded_models = {}
if torch.cuda.is_available():
torch.cuda.empty_cache()
gc.collect()
self._load_all_models()
def _load_all_models(self):
print("⚡ [Engine] Initializing Unified Architecture...")
unique_models = {}
for role, model_name in self.config.models.items():
if model_name not in unique_models: unique_models[model_name] = []
unique_models[model_name].append(role)
for model_name, roles in unique_models.items():
print(f" -> Loading Shared Model: {model_name}")
try:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.padding_side = "left"
if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=BitsAndBytesConfig(**self.config.quantization),
device_map="auto",
trust_remote_code=True
)
asset = {"model": model, "tokenizer": tokenizer}
for role in roles: self.loaded_models[role] = asset
except Exception as e:
print(f"❌ Failed to load {model_name}: {e}")
def load_model(self, role: str):
return self.loaded_models[role]
|