Spaces:
Sleeping
Sleeping
| """ | |
| Sheikh-Kitty Monitoring Dashboard | |
| Real-time system monitoring and visualization | |
| Features: | |
| - System resource monitoring (CPU, memory, disk) | |
| - API performance metrics | |
| - Security alerts display | |
| - Execution history tracking | |
| - Health status indicators | |
| Author: MiniMax Agent | |
| Date: 2025-11-14 | |
| """ | |
| import json | |
| import time | |
| import psutil | |
| from datetime import datetime, timedelta | |
| from pathlib import Path | |
| from typing import Dict, List, Any, Optional | |
| from dataclasses import asdict | |
| import logging | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class SimpleDashboard: | |
| """Simple terminal-based dashboard for monitoring""" | |
| def __init__(self, log_dir: str = "logs"): | |
| self.log_dir = Path(log_dir) | |
| self.log_dir.mkdir(exist_ok=True) | |
| self.state_file = self.log_dir / "dashboard_state.json" | |
| # System thresholds | |
| self.thresholds = { | |
| 'cpu_warning': 70.0, | |
| 'cpu_critical': 90.0, | |
| 'memory_warning': 75.0, | |
| 'memory_critical': 90.0, | |
| 'disk_warning': 80.0, | |
| 'disk_critical': 95.0 | |
| } | |
| def get_system_status(self) -> Dict[str, Any]: | |
| """Get current system status""" | |
| try: | |
| # CPU usage | |
| cpu_percent = psutil.cpu_percent(interval=1) | |
| cpu_count = psutil.cpu_count() | |
| # Memory usage | |
| memory = psutil.virtual_memory() | |
| # Disk usage | |
| disk = psutil.disk_usage('/') | |
| # Load average (Unix systems) | |
| try: | |
| load_avg = psutil.getloadavg()[0] if hasattr(psutil, 'getloadavg') else 0.0 | |
| except AttributeError: | |
| load_avg = 0.0 | |
| return { | |
| 'timestamp': datetime.now().isoformat(), | |
| 'cpu': { | |
| 'usage_percent': cpu_percent, | |
| 'count': cpu_count, | |
| 'load_average': load_avg, | |
| 'status': self._get_status_level(cpu_percent, 'cpu') | |
| }, | |
| 'memory': { | |
| 'usage_percent': memory.percent, | |
| 'available_gb': memory.available / (1024**3), | |
| 'total_gb': memory.total / (1024**3), | |
| 'status': self._get_status_level(memory.percent, 'memory') | |
| }, | |
| 'disk': { | |
| 'usage_percent': (disk.used / disk.total) * 100, | |
| 'free_gb': disk.free / (1024**3), | |
| 'total_gb': disk.total / (1024**3), | |
| 'status': self._get_status_level((disk.used / disk.total) * 100, 'disk') | |
| } | |
| } | |
| except Exception as e: | |
| logger.error(f"Failed to get system status: {e}") | |
| return {} | |
| def _get_status_level(self, value: float, resource_type: str) -> str: | |
| """Determine status level based on thresholds""" | |
| if resource_type == 'cpu': | |
| if value >= self.thresholds['cpu_critical']: | |
| return 'critical' | |
| elif value >= self.thresholds['cpu_warning']: | |
| return 'warning' | |
| elif resource_type == 'memory': | |
| if value >= self.thresholds['memory_critical']: | |
| return 'critical' | |
| elif value >= self.thresholds['memory_warning']: | |
| return 'warning' | |
| elif resource_type == 'disk': | |
| if value >= self.thresholds['disk_critical']: | |
| return 'critical' | |
| elif value >= self.thresholds['disk_warning']: | |
| return 'warning' | |
| return 'healthy' | |
| def get_api_metrics(self) -> Dict[str, Any]: | |
| """Get API metrics from log files""" | |
| try: | |
| api_log = self.log_dir / "api_requests.jsonl" | |
| if not api_log.exists(): | |
| return {} | |
| # Read recent API requests | |
| recent_requests = [] | |
| with open(api_log, 'r') as f: | |
| for line in f: | |
| try: | |
| request = json.loads(line.strip()) | |
| recent_requests.append(request) | |
| except json.JSONDecodeError: | |
| continue | |
| # Filter requests from last hour | |
| one_hour_ago = datetime.now() - timedelta(hours=1) | |
| recent_requests = [ | |
| req for req in recent_requests | |
| if datetime.fromisoformat(req['timestamp']) > one_hour_ago | |
| ] | |
| if not recent_requests: | |
| return {} | |
| # Calculate metrics | |
| execution_times = [req['execution_time'] for req in recent_requests] | |
| successes = [req['response_data']['success'] for req in recent_requests] | |
| return { | |
| 'total_requests': len(recent_requests), | |
| 'successful_requests': sum(successes), | |
| 'success_rate': sum(successes) / len(successes) if successes else 0, | |
| 'average_execution_time': sum(execution_times) / len(execution_times) if execution_times else 0, | |
| 'p95_execution_time': sorted(execution_times)[int(len(execution_times) * 0.95)] if execution_times else 0, | |
| 'endpoints': { | |
| req['endpoint']: { | |
| 'count': 1, | |
| 'success': req['response_data']['success'] | |
| } | |
| for req in recent_requests | |
| } | |
| } | |
| except Exception as e: | |
| logger.error(f"Failed to get API metrics: {e}") | |
| return {} | |
| def get_alerts(self) -> List[Dict[str, Any]]: | |
| """Get recent alerts""" | |
| try: | |
| alerts_file = self.log_dir / "alerts.jsonl" | |
| if not alerts_file.exists(): | |
| return [] | |
| alerts = [] | |
| with open(alerts_file, 'r') as f: | |
| for line in f: | |
| try: | |
| alert = json.loads(line.strip()) | |
| alerts.append(alert) | |
| except json.JSONDecodeError: | |
| continue | |
| # Return recent alerts (last 24 hours) | |
| one_day_ago = datetime.now() - timedelta(days=1) | |
| recent_alerts = [ | |
| alert for alert in alerts | |
| if datetime.fromisoformat(alert['timestamp']) > one_day_ago | |
| ] | |
| return sorted(recent_alerts, key=lambda x: x['timestamp'], reverse=True)[:10] | |
| except Exception as e: | |
| logger.error(f"Failed to get alerts: {e}") | |
| return [] | |
| def display_dashboard(self): | |
| """Display dashboard in terminal""" | |
| # Clear screen (ANSI escape code) | |
| print("\033[2J\033[H") | |
| print("=" * 60) | |
| print("🏗️ SHEIKH-KITTY MONITORING DASHBOARD") | |
| print("=" * 60) | |
| print(f"📅 {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") | |
| print() | |
| # System Status | |
| system_status = self.get_system_status() | |
| if system_status: | |
| print("🖥️ SYSTEM STATUS") | |
| print("-" * 20) | |
| # CPU | |
| cpu = system_status['cpu'] | |
| status_icon = self._get_status_icon(cpu['status']) | |
| print(f"{status_icon} CPU: {cpu['usage_percent']:6.1f}% (Cores: {cpu['count']}, Load: {cpu['load_average']:.2f})") | |
| # Memory | |
| memory = system_status['memory'] | |
| status_icon = self._get_status_icon(memory['status']) | |
| print(f"{status_icon} Memory: {memory['usage_percent']:6.1f}% (Available: {memory['available_gb']:.1f}GB)") | |
| # Disk | |
| disk = system_status['disk'] | |
| status_icon = self._get_status_icon(disk['status']) | |
| print(f"{status_icon} Disk: {disk['usage_percent']:6.1f}% (Free: {disk['free_gb']:.1f}GB)") | |
| print() | |
| # API Metrics | |
| api_metrics = self.get_api_metrics() | |
| if api_metrics: | |
| print("🌐 API METRICS (Last Hour)") | |
| print("-" * 25) | |
| print(f"📊 Requests: {api_metrics['total_requests']}") | |
| print(f"✅ Success: {api_metrics['successful_requests']} ({api_metrics['success_rate']:.1%})") | |
| print(f"⏱️ Avg Time: {api_metrics['average_execution_time']:.3f}s") | |
| print(f"🚀 P95 Time: {api_metrics['p95_execution_time']:.3f}s") | |
| # Endpoint breakdown | |
| if api_metrics['endpoints']: | |
| print("🔗 Endpoints:") | |
| for endpoint, stats in api_metrics['endpoints'].items(): | |
| print(f" {endpoint}: {stats['count']} requests, {stats['success']:.1%} success") | |
| print() | |
| # Recent Alerts | |
| alerts = self.get_alerts() | |
| if alerts: | |
| print("🚨 RECENT ALERTS") | |
| print("-" * 15) | |
| for alert in alerts[:5]: # Show last 5 alerts | |
| severity_icon = self._get_alert_icon(alert['severity']) | |
| print(f"{severity_icon} {alert['severity'].upper()}: {alert['message']}") | |
| print(f" 📅 {alert['timestamp']}") | |
| print() | |
| # Health Summary | |
| print("💚 SYSTEM HEALTH") | |
| print("-" * 15) | |
| health_score = self._calculate_health_score(system_status, api_metrics, alerts) | |
| health_status = self._get_health_status(health_score) | |
| print(f"Overall: {health_status} ({health_score:.1%})") | |
| print() | |
| print("Press Ctrl+C to exit") | |
| def _get_status_icon(self, status: str) -> str: | |
| """Get icon for status""" | |
| icons = { | |
| 'healthy': '🟢', | |
| 'warning': '🟡', | |
| 'critical': '🔴' | |
| } | |
| return icons.get(status, '⚪') | |
| def _get_alert_icon(self, severity: str) -> str: | |
| """Get icon for alert severity""" | |
| icons = { | |
| 'info': 'ℹ️', | |
| 'warning': '⚠️', | |
| 'error': '❌', | |
| 'critical': '🚨' | |
| } | |
| return icons.get(severity, '📢') | |
| def _calculate_health_score(self, system_status: Dict, api_metrics: Dict, alerts: List) -> float: | |
| """Calculate overall health score""" | |
| score = 1.0 | |
| # Deduct for system resource issues | |
| if system_status: | |
| if system_status['cpu']['status'] == 'warning': | |
| score -= 0.1 | |
| elif system_status['cpu']['status'] == 'critical': | |
| score -= 0.2 | |
| if system_status['memory']['status'] == 'warning': | |
| score -= 0.1 | |
| elif system_status['memory']['status'] == 'critical': | |
| score -= 0.2 | |
| if system_status['disk']['status'] == 'warning': | |
| score -= 0.1 | |
| elif system_status['disk']['status'] == 'critical': | |
| score -= 0.2 | |
| # Deduct for API issues | |
| if api_metrics: | |
| success_rate = api_metrics.get('success_rate', 1.0) | |
| if success_rate < 0.95: | |
| score -= (0.95 - success_rate) | |
| # Deduct for recent alerts | |
| recent_critical_alerts = sum(1 for alert in alerts if alert['severity'] == 'critical') | |
| if recent_critical_alerts > 0: | |
| score -= min(0.3, recent_critical_alerts * 0.1) | |
| return max(0.0, score) | |
| def _get_health_status(self, score: float) -> str: | |
| """Get health status text""" | |
| if score >= 0.9: | |
| return "Excellent" | |
| elif score >= 0.8: | |
| return "Good" | |
| elif score >= 0.7: | |
| return "Fair" | |
| elif score >= 0.5: | |
| return "Poor" | |
| else: | |
| return "Critical" | |
| def save_dashboard_state(self): | |
| """Save current dashboard state""" | |
| try: | |
| state = { | |
| 'timestamp': datetime.now().isoformat(), | |
| 'system_status': self.get_system_status(), | |
| 'api_metrics': self.get_api_metrics(), | |
| 'alerts': self.get_alerts() | |
| } | |
| with open(self.state_file, 'w') as f: | |
| json.dump(state, f, indent=2) | |
| except Exception as e: | |
| logger.error(f"Failed to save dashboard state: {e}") | |
| def run_continuous_monitoring(self, update_interval: int = 30): | |
| """Run continuous dashboard monitoring""" | |
| try: | |
| while True: | |
| self.display_dashboard() | |
| self.save_dashboard_state() | |
| time.sleep(update_interval) | |
| except KeyboardInterrupt: | |
| print("\n👋 Monitoring dashboard stopped") | |
| except Exception as e: | |
| logger.error(f"Dashboard error: {e}") | |
| def main(): | |
| """Main dashboard execution""" | |
| import argparse | |
| parser = argparse.ArgumentParser(description="Sheikh-Kitty Monitoring Dashboard") | |
| parser.add_argument('--interval', type=int, default=30, help='Update interval in seconds') | |
| parser.add_argument('--once', action='store_true', help='Display once and exit') | |
| args = parser.parse_args() | |
| dashboard = SimpleDashboard() | |
| if args.once: | |
| dashboard.display_dashboard() | |
| else: | |
| print("Starting Sheikh-Kitty monitoring dashboard...") | |
| print("Press Ctrl+C to exit") | |
| dashboard.run_continuous_monitoring(args.interval) | |
| if __name__ == "__main__": | |
| main() |