Spaces:
Paused
Paused
| import torch | |
| import gc | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| from src.core.config import Config | |
| class ModelEngine: | |
| def __init__(self): | |
| self.config = Config() | |
| self.loaded_models = {} | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| self._load_all_models() | |
| def _load_all_models(self): | |
| print("⚡ [Engine] Initializing Unified Architecture...") | |
| unique_models = {} | |
| for role, model_name in self.config.models.items(): | |
| if model_name not in unique_models: unique_models[model_name] = [] | |
| unique_models[model_name].append(role) | |
| for model_name, roles in unique_models.items(): | |
| print(f" -> Loading Shared Model: {model_name}") | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| tokenizer.padding_side = "left" | |
| if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| quantization_config=BitsAndBytesConfig(**self.config.quantization), | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| asset = {"model": model, "tokenizer": tokenizer} | |
| for role in roles: self.loaded_models[role] = asset | |
| except Exception as e: | |
| print(f"❌ Failed to load {model_name}: {e}") | |
| def load_model(self, role: str): | |
| return self.loaded_models[role] | |