Spaces:

jayashree
/

TatTwamAI

Sleeping

File size: 6,081 Bytes

20d720d

"""
Models module for Personal Coach CrewAI Application
Handles all AI model loading and management
"""

from typing import TYPE_CHECKING, Optional, Dict, Any
import torch

# Version info
__version__ = "1.0.0"

# Lazy imports
if TYPE_CHECKING:
    from .mistral_model import MistralModel, MistralConfig, MistralPromptFormatter

# Public API
__all__ = [
    # Main model classes
    "MistralModel",
    "MistralConfig", 
    "MistralPromptFormatter",
    
    # Model management
    "load_model",
    "get_model_info",
    "clear_model_cache",
    
    # Constants
    "AVAILABLE_MODELS",
    "MODEL_REQUIREMENTS",
    "DEFAULT_MODEL_CONFIG"
]

# Available models
AVAILABLE_MODELS = {
    "mistral-7b-instruct": {
        "model_id": "mistralai/Mistral-7B-Instruct-v0.1",
        "type": "instruction-following",
        "size": "7B",
        "context_length": 32768,
        "languages": ["multilingual"]
    },
    "mistral-7b": {
        "model_id": "mistralai/Mistral-7B-v0.1",
        "type": "base",
        "size": "7B", 
        "context_length": 32768,
        "languages": ["multilingual"]
    }
}

# Model requirements
MODEL_REQUIREMENTS = {
    "mistral-7b-instruct": {
        "ram": "16GB",
        "vram": "8GB (GPU) or 16GB (CPU)",
        "disk": "15GB",
        "compute": "GPU recommended"
    }
}

# Default configuration
DEFAULT_MODEL_CONFIG = {
    "max_length": 2048,
    "temperature": 0.7,
    "top_p": 0.95,
    "top_k": 50,
    "do_sample": True,
    "num_return_sequences": 1,
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
    "load_in_8bit": False,
    "cache_dir": ".cache/models"
}

# Model instance cache
_model_cache: Dict[str, Any] = {}

def load_model(model_name: str = "mistral-7b-instruct", config: Optional[Dict[str, Any]] = None):
    """
    Load a model with caching support
    
    Args:
        model_name: Name of the model to load
        config: Optional configuration override
        
    Returns:
        Model instance
    """
    # Check cache first
    cache_key = f"{model_name}_{str(config)}"
    if cache_key in _model_cache:
        return _model_cache[cache_key]
    
    # Import here to avoid circular imports
    from .mistral_model import MistralModel, MistralConfig
    
    # Get model info
    model_info = AVAILABLE_MODELS.get(model_name)
    if not model_info:
        raise ValueError(f"Unknown model: {model_name}")
    
    # Merge configurations
    model_config = DEFAULT_MODEL_CONFIG.copy()
    if config:
        model_config.update(config)
    
    # Create config object
    mistral_config = MistralConfig(
        model_id=model_info["model_id"],
        **model_config
    )
    
    # Load model
    model = MistralModel(mistral_config)
    
    # Cache it
    _model_cache[cache_key] = model
    
    return model

def get_model_info(model_name: str) -> Optional[Dict[str, Any]]:
    """
    Get information about a model
    
    Args:
        model_name: Name of the model
        
    Returns:
        Model information dictionary or None
    """
    info = AVAILABLE_MODELS.get(model_name)
    if info:
        # Add requirements
        requirements = MODEL_REQUIREMENTS.get(model_name, {})
        info["requirements"] = requirements
        
        # Add loading status
        cache_keys = [k for k in _model_cache.keys() if k.startswith(model_name)]
        info["is_loaded"] = len(cache_keys) > 0
        
    return info

def clear_model_cache(model_name: Optional[str] = None):
    """
    Clear model cache to free memory
    
    Args:
        model_name: Specific model to clear, or None for all
    """
    global _model_cache
    
    if model_name:
        # Clear specific model
        keys_to_remove = [k for k in _model_cache.keys() if k.startswith(model_name)]
        for key in keys_to_remove:
            del _model_cache[key]
    else:
        # Clear all
        _model_cache.clear()
    
    # Force garbage collection
    import gc
    gc.collect()
    
    # Clear GPU cache if using CUDA
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

# Utility functions
def estimate_memory_usage(model_name: str) -> Dict[str, Any]:
    """
    Estimate memory usage for a model
    
    Args:
        model_name: Name of the model
        
    Returns:
        Memory estimation dictionary
    """
    model_info = AVAILABLE_MODELS.get(model_name)
    if not model_info:
        return {}
    
    size = model_info.get("size", "7B")
    size_gb = float(size.replace("B", ""))
    
    estimates = {
        "model_size_gb": size_gb,
        "fp32_memory_gb": size_gb * 4,  # 4 bytes per parameter
        "fp16_memory_gb": size_gb * 2,  # 2 bytes per parameter
        "int8_memory_gb": size_gb,      # 1 byte per parameter
        "recommended_ram_gb": size_gb * 2.5,
        "recommended_vram_gb": size_gb * 1.5
    }
    
    return estimates

def get_device_info() -> Dict[str, Any]:
    """Get information about available compute devices"""
    info = {
        "cuda_available": torch.cuda.is_available(),
        "device_count": torch.cuda.device_count() if torch.cuda.is_available() else 0,
        "current_device": torch.cuda.current_device() if torch.cuda.is_available() else None,
        "device_name": torch.cuda.get_device_name() if torch.cuda.is_available() else "CPU"
    }
    
    if torch.cuda.is_available():
        info["gpu_memory"] = {
            "allocated": torch.cuda.memory_allocated() / 1024**3,  # GB
            "reserved": torch.cuda.memory_reserved() / 1024**3,    # GB
            "total": torch.cuda.get_device_properties(0).total_memory / 1024**3  # GB
        }
    
    return info

# Module initialization
import os
if os.getenv("DEBUG_MODE", "false").lower() == "true":
    print(f"Models module v{__version__} initialized")
    device_info = get_device_info()
    print(f"Device: {device_info['device_name']}")
    if device_info['cuda_available']:
        print(f"GPU Memory: {device_info['gpu_memory']['total']:.1f}GB")