""" Sheikh-Kitty Monitoring Dashboard Real-time system monitoring and visualization Features: - System resource monitoring (CPU, memory, disk) - API performance metrics - Security alerts display - Execution history tracking - Health status indicators Author: MiniMax Agent Date: 2025-11-14 """ import json import time import psutil from datetime import datetime, timedelta from pathlib import Path from typing import Dict, List, Any, Optional from dataclasses import asdict import logging # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class SimpleDashboard: """Simple terminal-based dashboard for monitoring""" def __init__(self, log_dir: str = "logs"): self.log_dir = Path(log_dir) self.log_dir.mkdir(exist_ok=True) self.state_file = self.log_dir / "dashboard_state.json" # System thresholds self.thresholds = { 'cpu_warning': 70.0, 'cpu_critical': 90.0, 'memory_warning': 75.0, 'memory_critical': 90.0, 'disk_warning': 80.0, 'disk_critical': 95.0 } def get_system_status(self) -> Dict[str, Any]: """Get current system status""" try: # CPU usage cpu_percent = psutil.cpu_percent(interval=1) cpu_count = psutil.cpu_count() # Memory usage memory = psutil.virtual_memory() # Disk usage disk = psutil.disk_usage('/') # Load average (Unix systems) try: load_avg = psutil.getloadavg()[0] if hasattr(psutil, 'getloadavg') else 0.0 except AttributeError: load_avg = 0.0 return { 'timestamp': datetime.now().isoformat(), 'cpu': { 'usage_percent': cpu_percent, 'count': cpu_count, 'load_average': load_avg, 'status': self._get_status_level(cpu_percent, 'cpu') }, 'memory': { 'usage_percent': memory.percent, 'available_gb': memory.available / (1024**3), 'total_gb': memory.total / (1024**3), 'status': self._get_status_level(memory.percent, 'memory') }, 'disk': { 'usage_percent': (disk.used / disk.total) * 100, 'free_gb': disk.free / (1024**3), 'total_gb': disk.total / (1024**3), 'status': self._get_status_level((disk.used / disk.total) * 100, 'disk') } } except Exception as e: logger.error(f"Failed to get system status: {e}") return {} def _get_status_level(self, value: float, resource_type: str) -> str: """Determine status level based on thresholds""" if resource_type == 'cpu': if value >= self.thresholds['cpu_critical']: return 'critical' elif value >= self.thresholds['cpu_warning']: return 'warning' elif resource_type == 'memory': if value >= self.thresholds['memory_critical']: return 'critical' elif value >= self.thresholds['memory_warning']: return 'warning' elif resource_type == 'disk': if value >= self.thresholds['disk_critical']: return 'critical' elif value >= self.thresholds['disk_warning']: return 'warning' return 'healthy' def get_api_metrics(self) -> Dict[str, Any]: """Get API metrics from log files""" try: api_log = self.log_dir / "api_requests.jsonl" if not api_log.exists(): return {} # Read recent API requests recent_requests = [] with open(api_log, 'r') as f: for line in f: try: request = json.loads(line.strip()) recent_requests.append(request) except json.JSONDecodeError: continue # Filter requests from last hour one_hour_ago = datetime.now() - timedelta(hours=1) recent_requests = [ req for req in recent_requests if datetime.fromisoformat(req['timestamp']) > one_hour_ago ] if not recent_requests: return {} # Calculate metrics execution_times = [req['execution_time'] for req in recent_requests] successes = [req['response_data']['success'] for req in recent_requests] return { 'total_requests': len(recent_requests), 'successful_requests': sum(successes), 'success_rate': sum(successes) / len(successes) if successes else 0, 'average_execution_time': sum(execution_times) / len(execution_times) if execution_times else 0, 'p95_execution_time': sorted(execution_times)[int(len(execution_times) * 0.95)] if execution_times else 0, 'endpoints': { req['endpoint']: { 'count': 1, 'success': req['response_data']['success'] } for req in recent_requests } } except Exception as e: logger.error(f"Failed to get API metrics: {e}") return {} def get_alerts(self) -> List[Dict[str, Any]]: """Get recent alerts""" try: alerts_file = self.log_dir / "alerts.jsonl" if not alerts_file.exists(): return [] alerts = [] with open(alerts_file, 'r') as f: for line in f: try: alert = json.loads(line.strip()) alerts.append(alert) except json.JSONDecodeError: continue # Return recent alerts (last 24 hours) one_day_ago = datetime.now() - timedelta(days=1) recent_alerts = [ alert for alert in alerts if datetime.fromisoformat(alert['timestamp']) > one_day_ago ] return sorted(recent_alerts, key=lambda x: x['timestamp'], reverse=True)[:10] except Exception as e: logger.error(f"Failed to get alerts: {e}") return [] def display_dashboard(self): """Display dashboard in terminal""" # Clear screen (ANSI escape code) print("\033[2J\033[H") print("=" * 60) print("šŸ—ļø SHEIKH-KITTY MONITORING DASHBOARD") print("=" * 60) print(f"šŸ“… {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print() # System Status system_status = self.get_system_status() if system_status: print("šŸ–„ļø SYSTEM STATUS") print("-" * 20) # CPU cpu = system_status['cpu'] status_icon = self._get_status_icon(cpu['status']) print(f"{status_icon} CPU: {cpu['usage_percent']:6.1f}% (Cores: {cpu['count']}, Load: {cpu['load_average']:.2f})") # Memory memory = system_status['memory'] status_icon = self._get_status_icon(memory['status']) print(f"{status_icon} Memory: {memory['usage_percent']:6.1f}% (Available: {memory['available_gb']:.1f}GB)") # Disk disk = system_status['disk'] status_icon = self._get_status_icon(disk['status']) print(f"{status_icon} Disk: {disk['usage_percent']:6.1f}% (Free: {disk['free_gb']:.1f}GB)") print() # API Metrics api_metrics = self.get_api_metrics() if api_metrics: print("🌐 API METRICS (Last Hour)") print("-" * 25) print(f"šŸ“Š Requests: {api_metrics['total_requests']}") print(f"āœ… Success: {api_metrics['successful_requests']} ({api_metrics['success_rate']:.1%})") print(f"ā±ļø Avg Time: {api_metrics['average_execution_time']:.3f}s") print(f"šŸš€ P95 Time: {api_metrics['p95_execution_time']:.3f}s") # Endpoint breakdown if api_metrics['endpoints']: print("šŸ”— Endpoints:") for endpoint, stats in api_metrics['endpoints'].items(): print(f" {endpoint}: {stats['count']} requests, {stats['success']:.1%} success") print() # Recent Alerts alerts = self.get_alerts() if alerts: print("🚨 RECENT ALERTS") print("-" * 15) for alert in alerts[:5]: # Show last 5 alerts severity_icon = self._get_alert_icon(alert['severity']) print(f"{severity_icon} {alert['severity'].upper()}: {alert['message']}") print(f" šŸ“… {alert['timestamp']}") print() # Health Summary print("šŸ’š SYSTEM HEALTH") print("-" * 15) health_score = self._calculate_health_score(system_status, api_metrics, alerts) health_status = self._get_health_status(health_score) print(f"Overall: {health_status} ({health_score:.1%})") print() print("Press Ctrl+C to exit") def _get_status_icon(self, status: str) -> str: """Get icon for status""" icons = { 'healthy': '🟢', 'warning': '🟔', 'critical': 'šŸ”“' } return icons.get(status, '⚪') def _get_alert_icon(self, severity: str) -> str: """Get icon for alert severity""" icons = { 'info': 'ā„¹ļø', 'warning': 'āš ļø', 'error': 'āŒ', 'critical': '🚨' } return icons.get(severity, 'šŸ“¢') def _calculate_health_score(self, system_status: Dict, api_metrics: Dict, alerts: List) -> float: """Calculate overall health score""" score = 1.0 # Deduct for system resource issues if system_status: if system_status['cpu']['status'] == 'warning': score -= 0.1 elif system_status['cpu']['status'] == 'critical': score -= 0.2 if system_status['memory']['status'] == 'warning': score -= 0.1 elif system_status['memory']['status'] == 'critical': score -= 0.2 if system_status['disk']['status'] == 'warning': score -= 0.1 elif system_status['disk']['status'] == 'critical': score -= 0.2 # Deduct for API issues if api_metrics: success_rate = api_metrics.get('success_rate', 1.0) if success_rate < 0.95: score -= (0.95 - success_rate) # Deduct for recent alerts recent_critical_alerts = sum(1 for alert in alerts if alert['severity'] == 'critical') if recent_critical_alerts > 0: score -= min(0.3, recent_critical_alerts * 0.1) return max(0.0, score) def _get_health_status(self, score: float) -> str: """Get health status text""" if score >= 0.9: return "Excellent" elif score >= 0.8: return "Good" elif score >= 0.7: return "Fair" elif score >= 0.5: return "Poor" else: return "Critical" def save_dashboard_state(self): """Save current dashboard state""" try: state = { 'timestamp': datetime.now().isoformat(), 'system_status': self.get_system_status(), 'api_metrics': self.get_api_metrics(), 'alerts': self.get_alerts() } with open(self.state_file, 'w') as f: json.dump(state, f, indent=2) except Exception as e: logger.error(f"Failed to save dashboard state: {e}") def run_continuous_monitoring(self, update_interval: int = 30): """Run continuous dashboard monitoring""" try: while True: self.display_dashboard() self.save_dashboard_state() time.sleep(update_interval) except KeyboardInterrupt: print("\nšŸ‘‹ Monitoring dashboard stopped") except Exception as e: logger.error(f"Dashboard error: {e}") def main(): """Main dashboard execution""" import argparse parser = argparse.ArgumentParser(description="Sheikh-Kitty Monitoring Dashboard") parser.add_argument('--interval', type=int, default=30, help='Update interval in seconds') parser.add_argument('--once', action='store_true', help='Display once and exit') args = parser.parse_args() dashboard = SimpleDashboard() if args.once: dashboard.display_dashboard() else: print("Starting Sheikh-Kitty monitoring dashboard...") print("Press Ctrl+C to exit") dashboard.run_continuous_monitoring(args.interval) if __name__ == "__main__": main()