""" System Prerequisites Checker Module This module provides functionality to check system prerequisites including: - CUDA/GPU availability - Environment dependencies - Model download with progress tracking """ import os import sys import torch import platform from typing import Dict, Tuple, Optional from pathlib import Path import importlib.metadata from huggingface_hub import hf_hub_download, snapshot_download from tqdm import tqdm class SystemChecker: """Check system prerequisites for MLOps platform.""" def __init__(self, models_dir: str = "models"): """ Initialize system checker. Args: models_dir: Directory to store downloaded models """ self.models_dir = Path(models_dir) self.models_dir.mkdir(parents=True, exist_ok=True) def check_cuda(self) -> Dict[str, any]: """ Check CUDA/GPU availability and information. Returns: Dict with CUDA status, device info, and specifications """ result = { "available": torch.cuda.is_available(), "device_count": 0, "devices": [], "cuda_version": None, "cudnn_version": None } if result["available"]: result["device_count"] = torch.cuda.device_count() result["cuda_version"] = torch.version.cuda result["cudnn_version"] = torch.backends.cudnn.version() for i in range(result["device_count"]): device_props = { "id": i, "name": torch.cuda.get_device_name(i), "memory_total": torch.cuda.get_device_properties(i).total_memory / 1024**3, # GB "compute_capability": f"{torch.cuda.get_device_properties(i).major}.{torch.cuda.get_device_properties(i).minor}" } result["devices"].append(device_props) return result def check_environment(self) -> Dict[str, any]: """ Check Python environment and required dependencies. Returns: Dict with Python version, package versions, and system info """ result = { "python_version": sys.version, "platform": platform.platform(), "architecture": platform.machine(), "packages": {}, "missing_packages": [], "all_satisfied": True } # Required packages with minimum versions required_packages = { "torch": "2.0.0", "transformers": "4.36.0", "streamlit": "1.28.0", "pandas": "2.0.0", "numpy": "1.24.0", "plotly": "5.18.0", "scikit-learn": "1.3.0" } for package, min_version in required_packages.items(): try: version = importlib.metadata.version(package) result["packages"][package] = { "installed": version, "required": f">={min_version}", "satisfied": True # Simple check, could add version comparison } except importlib.metadata.PackageNotFoundError: result["packages"][package] = { "installed": None, "required": f">={min_version}", "satisfied": False } result["missing_packages"].append(package) result["all_satisfied"] = False return result def download_model( self, model_name: str, progress_callback: Optional[callable] = None ) -> Tuple[bool, str, str]: """ Download model from HuggingFace Hub to local cache. Args: model_name: HuggingFace model identifier (e.g., "roberta-base") progress_callback: Optional callback function for progress updates Returns: Tuple of (success: bool, model_path: str, message: str) """ try: model_cache_path = self.models_dir / model_name.replace("/", "_") # Check if model already exists if model_cache_path.exists() and any(model_cache_path.iterdir()): return True, str(model_cache_path), f"Model '{model_name}' already exists in cache" # Download model if progress_callback: progress_callback(f"Downloading {model_name}...", 0.1) # Use snapshot_download to get all model files cache_dir = snapshot_download( repo_id=model_name, cache_dir=str(self.models_dir), local_dir=str(model_cache_path), local_dir_use_symlinks=False ) if progress_callback: progress_callback(f"Downloaded {model_name} successfully", 1.0) return True, str(model_cache_path), f"Model '{model_name}' downloaded successfully" except Exception as e: error_msg = f"Failed to download model '{model_name}': {str(e)}" if progress_callback: progress_callback(error_msg, 0.0) return False, "", error_msg def get_model_info(self, model_name: str) -> Dict[str, any]: """ Get information about a model (local or remote). Args: model_name: Model identifier Returns: Dict with model information """ model_cache_path = self.models_dir / model_name.replace("/", "_") info = { "name": model_name, "local_path": str(model_cache_path), "exists_locally": model_cache_path.exists() and any(model_cache_path.iterdir()), "size_mb": 0 } if info["exists_locally"]: # Calculate total size total_size = sum( f.stat().st_size for f in model_cache_path.rglob('*') if f.is_file() ) info["size_mb"] = total_size / (1024 * 1024) return info def format_bytes(bytes_size: float) -> str: """Format bytes to human-readable string.""" for unit in ['B', 'KB', 'MB', 'GB']: if bytes_size < 1024.0: return f"{bytes_size:.2f} {unit}" bytes_size /= 1024.0 return f"{bytes_size:.2f} TB" def get_system_summary() -> str: """Get a formatted summary of system capabilities.""" checker = SystemChecker() cuda_info = checker.check_cuda() env_info = checker.check_environment() summary = [] summary.append("=" * 60) summary.append("SYSTEM SUMMARY") summary.append("=" * 60) # Python & Platform summary.append(f"\nPython: {env_info['python_version'].split()[0]}") summary.append(f"Platform: {env_info['platform']}") summary.append(f"Architecture: {env_info['architecture']}") # CUDA summary.append(f"\nCUDA Available: {'Yes' if cuda_info['available'] else 'No (CPU only)'}") if cuda_info['available']: summary.append(f"CUDA Version: {cuda_info['cuda_version']}") summary.append(f"Number of GPUs: {cuda_info['device_count']}") for device in cuda_info['devices']: summary.append(f" - GPU {device['id']}: {device['name']}") summary.append(f" Memory: {device['memory_total']:.2f} GB") summary.append(f" Compute: {device['compute_capability']}") # Packages summary.append(f"\n📦 Required Packages: {'✅ All Satisfied' if env_info['all_satisfied'] else '⚠️ Missing Packages'}") if env_info['missing_packages']: summary.append(f" Missing: {', '.join(env_info['missing_packages'])}") summary.append("=" * 60) return "\n".join(summary) if __name__ == "__main__": # Test the system checker print(get_system_summary()) checker = SystemChecker() # Test model download (small model for testing) print("\n\nTesting model download...") success, path, msg = checker.download_model( "distilbert-base-uncased", progress_callback=lambda msg, progress: print(f"Progress: {progress*100:.0f}% - {msg}") ) print(f"Result: {msg}")