Spaces:
Sleeping
Sleeping
| # llm_clients/shared_models.py | |
| """ | |
| Shared model manager to avoid loading the same model multiple times. | |
| This significantly improves memory usage and startup time. | |
| """ | |
| from typing import Optional, Dict, Any, Tuple | |
| import threading | |
| import os | |
| class SharedModelManager: | |
| """Singleton class to manage shared model instances""" | |
| _instance = None | |
| _lock = threading.Lock() | |
| _models: Dict[str, Any] = {} | |
| _model_components: Dict[str, Dict[str, Any]] = {} # Store actual model components | |
| def __new__(cls): | |
| if cls._instance is None: | |
| with cls._lock: | |
| if cls._instance is None: | |
| cls._instance = super().__new__(cls) | |
| return cls._instance | |
| def get_finetuned_model_components(self, model_name: str = "zazaman/fmb") -> Optional[Dict[str, Any]]: | |
| """ | |
| Get or load shared model components (model, tokenizer, classifier). | |
| Args: | |
| model_name: Name of the model to load | |
| Returns: | |
| Dict with 'model', 'tokenizer', 'classifier' components or None if loading fails | |
| """ | |
| model_key = f"finetuned_components_{model_name}" | |
| if model_key not in self._model_components: | |
| try: | |
| print(f"π Loading shared finetuned model components: {model_name}") | |
| # Import here to avoid circular imports | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline | |
| import torch | |
| # Set up cache directory for HF Spaces compatibility | |
| if not os.getenv('HF_HOME'): | |
| cache_dir = os.path.expanduser("~/.cache/huggingface") | |
| os.environ['HF_HOME'] = cache_dir | |
| os.environ['TRANSFORMERS_CACHE'] = os.path.join(cache_dir, 'transformers') | |
| # Create cache directories if they don't exist | |
| os.makedirs(cache_dir, exist_ok=True) | |
| os.makedirs(os.path.join(cache_dir, 'transformers'), exist_ok=True) | |
| print(f" π Using cache directory: {cache_dir}") | |
| # Apply optimizations | |
| torch._dynamo.config.suppress_errors = True | |
| torch._dynamo.config.disable = True | |
| os.environ["TORCH_COMPILE_DISABLE"] = "1" | |
| os.environ["TORCHDYNAMO_DISABLE"] = "1" | |
| os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0" | |
| print(f" π₯ Downloading model from Hugging Face: {model_name}") | |
| # Load model and tokenizer with explicit cache directory | |
| model = AutoModelForSequenceClassification.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.float32, | |
| device_map=None, | |
| cache_dir=os.environ.get('TRANSFORMERS_CACHE'), | |
| local_files_only=False, # Allow downloading | |
| trust_remote_code=False # Security best practice | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| model_name, | |
| cache_dir=os.environ.get('TRANSFORMERS_CACHE'), | |
| local_files_only=False, | |
| trust_remote_code=False | |
| ) | |
| # Disable compilation | |
| if hasattr(model, '_compiler_config'): | |
| model._compiler_config = None | |
| # Move to CPU | |
| device = "cpu" | |
| model = model.to(device) | |
| print(f" π§ Creating classifier pipeline...") | |
| # Create classifier pipeline | |
| classifier = pipeline( | |
| "text-classification", | |
| model=model, | |
| tokenizer=tokenizer, | |
| device=device, | |
| framework="pt", | |
| torch_dtype=torch.float32 | |
| ) | |
| # Store components | |
| self._model_components[model_key] = { | |
| "model": model, | |
| "tokenizer": tokenizer, | |
| "classifier": classifier, | |
| "device": device, | |
| "model_name": model_name | |
| } | |
| print(f"β Shared finetuned model components loaded successfully: {model_name}") | |
| print(f" Device: {device}") | |
| print(f" Cache: {os.environ.get('TRANSFORMERS_CACHE', 'default')}") | |
| except PermissionError as e: | |
| print(f"β Permission error loading model {model_name}: {e}") | |
| print(f" This might be a cache directory issue in the deployment environment.") | |
| print(f" Suggestion: Check HF_HOME and cache directory permissions.") | |
| self._model_components[model_key] = None | |
| return None | |
| except Exception as e: | |
| print(f"β Failed to load shared finetuned model components {model_name}: {e}") | |
| print(f" Error type: {type(e).__name__}") | |
| if "connection" in str(e).lower() or "network" in str(e).lower(): | |
| print(f" This appears to be a network issue. Check internet connectivity.") | |
| elif "disk" in str(e).lower() or "space" in str(e).lower(): | |
| print(f" This appears to be a disk space issue.") | |
| self._model_components[model_key] = None | |
| return None | |
| return self._model_components[model_key] | |
| def get_finetuned_guard_client(self, model_name: str = "zazaman/fmb") -> Optional[Any]: | |
| """ | |
| Get or create a shared FinetunedGuardClient instance that uses shared model components. | |
| Args: | |
| model_name: Name of the model to load | |
| Returns: | |
| FinetunedGuardClient instance or None if loading fails | |
| """ | |
| model_key = f"finetuned_guard_{model_name}" | |
| if model_key not in self._models: | |
| try: | |
| # Get shared model components | |
| components = self.get_finetuned_model_components(model_name) | |
| if not components: | |
| return None | |
| from .finetuned_guard import FinetunedGuardClient | |
| print(f" π Creating FinetunedGuardClient with shared model components: {model_name}") | |
| model_config = { | |
| "model_name": model_name | |
| } | |
| # Create client that will use shared components | |
| client = FinetunedGuardClient(model_config, "", shared_components=components) | |
| self._models[model_key] = client | |
| print(f"β Shared finetuned guard client created successfully: {model_name}") | |
| except Exception as e: | |
| print(f"β Failed to create shared finetuned guard client {model_name}: {e}") | |
| self._models[model_key] = None | |
| return None | |
| return self._models[model_key] | |
| def clear_models(self): | |
| """Clear all cached models (useful for testing)""" | |
| self._models.clear() | |
| self._model_components.clear() | |
| def get_model_info(self) -> Dict[str, bool]: | |
| """Get information about loaded models""" | |
| return { | |
| model_key: model is not None | |
| for model_key, model in self._models.items() | |
| } | |
| # Global singleton instance | |
| shared_model_manager = SharedModelManager() |