Spaces:

jayashree
/

TatTwamAI

Sleeping

Jayashree Sridhar

First Version

20d720d 11 months ago

6.08 kB

	"""
	Models module for Personal Coach CrewAI Application
	Handles all AI model loading and management
	"""

	from typing import TYPE_CHECKING, Optional, Dict, Any
	import torch

	# Version info
	__version__ = "1.0.0"

	# Lazy imports
	if TYPE_CHECKING:
	from .mistral_model import MistralModel, MistralConfig, MistralPromptFormatter

	# Public API
	__all__ = [
	# Main model classes
	"MistralModel",
	"MistralConfig",
	"MistralPromptFormatter",

	# Model management
	"load_model",
	"get_model_info",
	"clear_model_cache",

	# Constants
	"AVAILABLE_MODELS",
	"MODEL_REQUIREMENTS",
	"DEFAULT_MODEL_CONFIG"
	]

	# Available models
	AVAILABLE_MODELS = {
	"mistral-7b-instruct": {
	"model_id": "mistralai/Mistral-7B-Instruct-v0.1",
	"type": "instruction-following",
	"size": "7B",
	"context_length": 32768,
	"languages": ["multilingual"]
	},
	"mistral-7b": {
	"model_id": "mistralai/Mistral-7B-v0.1",
	"type": "base",
	"size": "7B",
	"context_length": 32768,
	"languages": ["multilingual"]
	}
	}

	# Model requirements
	MODEL_REQUIREMENTS = {
	"mistral-7b-instruct": {
	"ram": "16GB",
	"vram": "8GB (GPU) or 16GB (CPU)",
	"disk": "15GB",
	"compute": "GPU recommended"
	}
	}

	# Default configuration
	DEFAULT_MODEL_CONFIG = {
	"max_length": 2048,
	"temperature": 0.7,
	"top_p": 0.95,
	"top_k": 50,
	"do_sample": True,
	"num_return_sequences": 1,
	"device": "cuda" if torch.cuda.is_available() else "cpu",
	"torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
	"load_in_8bit": False,
	"cache_dir": ".cache/models"
	}

	# Model instance cache
	_model_cache: Dict[str, Any] = {}

	def load_model(model_name: str = "mistral-7b-instruct", config: Optional[Dict[str, Any]] = None):
	"""
	Load a model with caching support

	Args:
	model_name: Name of the model to load
	config: Optional configuration override

	Returns:
	Model instance
	"""
	# Check cache first
	cache_key = f"{model_name}_{str(config)}"
	if cache_key in _model_cache:
	return _model_cache[cache_key]

	# Import here to avoid circular imports
	from .mistral_model import MistralModel, MistralConfig

	# Get model info
	model_info = AVAILABLE_MODELS.get(model_name)
	if not model_info:
	raise ValueError(f"Unknown model: {model_name}")

	# Merge configurations
	model_config = DEFAULT_MODEL_CONFIG.copy()
	if config:
	model_config.update(config)

	# Create config object
	mistral_config = MistralConfig(
	model_id=model_info["model_id"],
	**model_config
	)

	# Load model
	model = MistralModel(mistral_config)

	# Cache it
	_model_cache[cache_key] = model

	return model

	def get_model_info(model_name: str) -> Optional[Dict[str, Any]]:
	"""
	Get information about a model

	Args:
	model_name: Name of the model

	Returns:
	Model information dictionary or None
	"""
	info = AVAILABLE_MODELS.get(model_name)
	if info:
	# Add requirements
	requirements = MODEL_REQUIREMENTS.get(model_name, {})
	info["requirements"] = requirements

	# Add loading status
	cache_keys = [k for k in _model_cache.keys() if k.startswith(model_name)]
	info["is_loaded"] = len(cache_keys) > 0

	return info

	def clear_model_cache(model_name: Optional[str] = None):
	"""
	Clear model cache to free memory

	Args:
	model_name: Specific model to clear, or None for all
	"""
	global _model_cache

	if model_name:
	# Clear specific model
	keys_to_remove = [k for k in _model_cache.keys() if k.startswith(model_name)]
	for key in keys_to_remove:
	del _model_cache[key]
	else:
	# Clear all
	_model_cache.clear()

	# Force garbage collection
	import gc
	gc.collect()

	# Clear GPU cache if using CUDA
	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	# Utility functions
	def estimate_memory_usage(model_name: str) -> Dict[str, Any]:
	"""
	Estimate memory usage for a model

	Args:
	model_name: Name of the model

	Returns:
	Memory estimation dictionary
	"""
	model_info = AVAILABLE_MODELS.get(model_name)
	if not model_info:
	return {}

	size = model_info.get("size", "7B")
	size_gb = float(size.replace("B", ""))

	estimates = {
	"model_size_gb": size_gb,
	"fp32_memory_gb": size_gb * 4, # 4 bytes per parameter
	"fp16_memory_gb": size_gb * 2, # 2 bytes per parameter
	"int8_memory_gb": size_gb, # 1 byte per parameter
	"recommended_ram_gb": size_gb * 2.5,
	"recommended_vram_gb": size_gb * 1.5
	}

	return estimates

	def get_device_info() -> Dict[str, Any]:
	"""Get information about available compute devices"""
	info = {
	"cuda_available": torch.cuda.is_available(),
	"device_count": torch.cuda.device_count() if torch.cuda.is_available() else 0,
	"current_device": torch.cuda.current_device() if torch.cuda.is_available() else None,
	"device_name": torch.cuda.get_device_name() if torch.cuda.is_available() else "CPU"
	}

	if torch.cuda.is_available():
	info["gpu_memory"] = {
	"allocated": torch.cuda.memory_allocated() / 1024**3, # GB
	"reserved": torch.cuda.memory_reserved() / 1024**3, # GB
	"total": torch.cuda.get_device_properties(0).total_memory / 1024**3 # GB
	}

	return info

	# Module initialization
	import os
	if os.getenv("DEBUG_MODE", "false").lower() == "true":
	print(f"Models module v{__version__} initialized")
	device_info = get_device_info()
	print(f"Device: {device_info['device_name']}")
	if device_info['cuda_available']:
	print(f"GPU Memory: {device_info['gpu_memory']['total']:.1f}GB")