import torch import gc from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig from src.core.config import Config class ModelEngine: def __init__(self): self.config = Config() self.loaded_models = {} if torch.cuda.is_available(): torch.cuda.empty_cache() gc.collect() self._load_all_models() def _load_all_models(self): print("⚡ [Engine] Initializing Unified Architecture...") unique_models = {} for role, model_name in self.config.models.items(): if model_name not in unique_models: unique_models[model_name] = [] unique_models[model_name].append(role) for model_name, roles in unique_models.items(): print(f" -> Loading Shared Model: {model_name}") try: tokenizer = AutoTokenizer.from_pretrained(model_name) tokenizer.padding_side = "left" if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( model_name, quantization_config=BitsAndBytesConfig(**self.config.quantization), device_map="auto", trust_remote_code=True ) asset = {"model": model, "tokenizer": tokenizer} for role in roles: self.loaded_models[role] = asset except Exception as e: print(f"❌ Failed to load {model_name}: {e}") def load_model(self, role: str): return self.loaded_models[role]