| """ |
| Models module for Personal Coach CrewAI Application |
| Handles all AI model loading and management |
| """ |
|
|
| from typing import TYPE_CHECKING, Optional, Dict, Any |
| import torch |
|
|
| |
| __version__ = "1.0.0" |
|
|
| |
| if TYPE_CHECKING: |
| from .mistral_model import MistralModel, MistralConfig, MistralPromptFormatter |
|
|
| |
| __all__ = [ |
| |
| "MistralModel", |
| "MistralConfig", |
| "MistralPromptFormatter", |
| |
| |
| "load_model", |
| "get_model_info", |
| "clear_model_cache", |
| |
| |
| "AVAILABLE_MODELS", |
| "MODEL_REQUIREMENTS", |
| "DEFAULT_MODEL_CONFIG" |
| ] |
|
|
| |
| AVAILABLE_MODELS = { |
| "mistral-7b-instruct": { |
| "model_id": "mistralai/Mistral-7B-Instruct-v0.1", |
| "type": "instruction-following", |
| "size": "7B", |
| "context_length": 32768, |
| "languages": ["multilingual"] |
| }, |
| "mistral-7b": { |
| "model_id": "mistralai/Mistral-7B-v0.1", |
| "type": "base", |
| "size": "7B", |
| "context_length": 32768, |
| "languages": ["multilingual"] |
| } |
| } |
|
|
| |
| MODEL_REQUIREMENTS = { |
| "mistral-7b-instruct": { |
| "ram": "16GB", |
| "vram": "8GB (GPU) or 16GB (CPU)", |
| "disk": "15GB", |
| "compute": "GPU recommended" |
| } |
| } |
|
|
| |
| DEFAULT_MODEL_CONFIG = { |
| "max_length": 2048, |
| "temperature": 0.7, |
| "top_p": 0.95, |
| "top_k": 50, |
| "do_sample": True, |
| "num_return_sequences": 1, |
| "device": "cuda" if torch.cuda.is_available() else "cpu", |
| "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32, |
| "load_in_8bit": False, |
| "cache_dir": ".cache/models" |
| } |
|
|
| |
| _model_cache: Dict[str, Any] = {} |
|
|
| def load_model(model_name: str = "mistral-7b-instruct", config: Optional[Dict[str, Any]] = None): |
| """ |
| Load a model with caching support |
| |
| Args: |
| model_name: Name of the model to load |
| config: Optional configuration override |
| |
| Returns: |
| Model instance |
| """ |
| |
| cache_key = f"{model_name}_{str(config)}" |
| if cache_key in _model_cache: |
| return _model_cache[cache_key] |
| |
| |
| from .mistral_model import MistralModel, MistralConfig |
| |
| |
| model_info = AVAILABLE_MODELS.get(model_name) |
| if not model_info: |
| raise ValueError(f"Unknown model: {model_name}") |
| |
| |
| model_config = DEFAULT_MODEL_CONFIG.copy() |
| if config: |
| model_config.update(config) |
| |
| |
| mistral_config = MistralConfig( |
| model_id=model_info["model_id"], |
| **model_config |
| ) |
| |
| |
| model = MistralModel(mistral_config) |
| |
| |
| _model_cache[cache_key] = model |
| |
| return model |
|
|
| def get_model_info(model_name: str) -> Optional[Dict[str, Any]]: |
| """ |
| Get information about a model |
| |
| Args: |
| model_name: Name of the model |
| |
| Returns: |
| Model information dictionary or None |
| """ |
| info = AVAILABLE_MODELS.get(model_name) |
| if info: |
| |
| requirements = MODEL_REQUIREMENTS.get(model_name, {}) |
| info["requirements"] = requirements |
| |
| |
| cache_keys = [k for k in _model_cache.keys() if k.startswith(model_name)] |
| info["is_loaded"] = len(cache_keys) > 0 |
| |
| return info |
|
|
| def clear_model_cache(model_name: Optional[str] = None): |
| """ |
| Clear model cache to free memory |
| |
| Args: |
| model_name: Specific model to clear, or None for all |
| """ |
| global _model_cache |
| |
| if model_name: |
| |
| keys_to_remove = [k for k in _model_cache.keys() if k.startswith(model_name)] |
| for key in keys_to_remove: |
| del _model_cache[key] |
| else: |
| |
| _model_cache.clear() |
| |
| |
| import gc |
| gc.collect() |
| |
| |
| if torch.cuda.is_available(): |
| torch.cuda.empty_cache() |
|
|
| |
| def estimate_memory_usage(model_name: str) -> Dict[str, Any]: |
| """ |
| Estimate memory usage for a model |
| |
| Args: |
| model_name: Name of the model |
| |
| Returns: |
| Memory estimation dictionary |
| """ |
| model_info = AVAILABLE_MODELS.get(model_name) |
| if not model_info: |
| return {} |
| |
| size = model_info.get("size", "7B") |
| size_gb = float(size.replace("B", "")) |
| |
| estimates = { |
| "model_size_gb": size_gb, |
| "fp32_memory_gb": size_gb * 4, |
| "fp16_memory_gb": size_gb * 2, |
| "int8_memory_gb": size_gb, |
| "recommended_ram_gb": size_gb * 2.5, |
| "recommended_vram_gb": size_gb * 1.5 |
| } |
| |
| return estimates |
|
|
| def get_device_info() -> Dict[str, Any]: |
| """Get information about available compute devices""" |
| info = { |
| "cuda_available": torch.cuda.is_available(), |
| "device_count": torch.cuda.device_count() if torch.cuda.is_available() else 0, |
| "current_device": torch.cuda.current_device() if torch.cuda.is_available() else None, |
| "device_name": torch.cuda.get_device_name() if torch.cuda.is_available() else "CPU" |
| } |
| |
| if torch.cuda.is_available(): |
| info["gpu_memory"] = { |
| "allocated": torch.cuda.memory_allocated() / 1024**3, |
| "reserved": torch.cuda.memory_reserved() / 1024**3, |
| "total": torch.cuda.get_device_properties(0).total_memory / 1024**3 |
| } |
| |
| return info |
|
|
| |
| import os |
| if os.getenv("DEBUG_MODE", "false").lower() == "true": |
| print(f"Models module v{__version__} initialized") |
| device_info = get_device_info() |
| print(f"Device: {device_info['device_name']}") |
| if device_info['cuda_available']: |
| print(f"GPU Memory: {device_info['gpu_memory']['total']:.1f}GB") |