Spaces:

likhonsheikh
/

sheikh-kitty

Sleeping

App Files Files Community

sheikh-kitty / monitoring /dashboard.py

likhonsheikh

Upload folder using huggingface_hub

0efaf6e verified 4 months ago

raw

history blame contribute delete

13.8 kB

	"""
	Sheikh-Kitty Monitoring Dashboard
	Real-time system monitoring and visualization

	Features:
	- System resource monitoring (CPU, memory, disk)
	- API performance metrics
	- Security alerts display
	- Execution history tracking
	- Health status indicators

	Author: MiniMax Agent
	Date: 2025-11-14
	"""

	import json
	import time
	import psutil
	from datetime import datetime, timedelta
	from pathlib import Path
	from typing import Dict, List, Any, Optional
	from dataclasses import asdict
	import logging

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)


	class SimpleDashboard:
	"""Simple terminal-based dashboard for monitoring"""

	def __init__(self, log_dir: str = "logs"):
	self.log_dir = Path(log_dir)
	self.log_dir.mkdir(exist_ok=True)
	self.state_file = self.log_dir / "dashboard_state.json"

	# System thresholds
	self.thresholds = {
	'cpu_warning': 70.0,
	'cpu_critical': 90.0,
	'memory_warning': 75.0,
	'memory_critical': 90.0,
	'disk_warning': 80.0,
	'disk_critical': 95.0
	}

	def get_system_status(self) -> Dict[str, Any]:
	"""Get current system status"""
	try:
	# CPU usage
	cpu_percent = psutil.cpu_percent(interval=1)
	cpu_count = psutil.cpu_count()

	# Memory usage
	memory = psutil.virtual_memory()

	# Disk usage
	disk = psutil.disk_usage('/')

	# Load average (Unix systems)
	try:
	load_avg = psutil.getloadavg()[0] if hasattr(psutil, 'getloadavg') else 0.0
	except AttributeError:
	load_avg = 0.0

	return {
	'timestamp': datetime.now().isoformat(),
	'cpu': {
	'usage_percent': cpu_percent,
	'count': cpu_count,
	'load_average': load_avg,
	'status': self._get_status_level(cpu_percent, 'cpu')
	},
	'memory': {
	'usage_percent': memory.percent,
	'available_gb': memory.available / (1024**3),
	'total_gb': memory.total / (1024**3),
	'status': self._get_status_level(memory.percent, 'memory')
	},
	'disk': {
	'usage_percent': (disk.used / disk.total) * 100,
	'free_gb': disk.free / (1024**3),
	'total_gb': disk.total / (1024**3),
	'status': self._get_status_level((disk.used / disk.total) * 100, 'disk')
	}
	}
	except Exception as e:
	logger.error(f"Failed to get system status: {e}")
	return {}

	def _get_status_level(self, value: float, resource_type: str) -> str:
	"""Determine status level based on thresholds"""
	if resource_type == 'cpu':
	if value >= self.thresholds['cpu_critical']:
	return 'critical'
	elif value >= self.thresholds['cpu_warning']:
	return 'warning'
	elif resource_type == 'memory':
	if value >= self.thresholds['memory_critical']:
	return 'critical'
	elif value >= self.thresholds['memory_warning']:
	return 'warning'
	elif resource_type == 'disk':
	if value >= self.thresholds['disk_critical']:
	return 'critical'
	elif value >= self.thresholds['disk_warning']:
	return 'warning'

	return 'healthy'

	def get_api_metrics(self) -> Dict[str, Any]:
	"""Get API metrics from log files"""
	try:
	api_log = self.log_dir / "api_requests.jsonl"
	if not api_log.exists():
	return {}

	# Read recent API requests
	recent_requests = []
	with open(api_log, 'r') as f:
	for line in f:
	try:
	request = json.loads(line.strip())
	recent_requests.append(request)
	except json.JSONDecodeError:
	continue

	# Filter requests from last hour
	one_hour_ago = datetime.now() - timedelta(hours=1)
	recent_requests = [
	req for req in recent_requests
	if datetime.fromisoformat(req['timestamp']) > one_hour_ago
	]

	if not recent_requests:
	return {}

	# Calculate metrics
	execution_times = [req['execution_time'] for req in recent_requests]
	successes = [req['response_data']['success'] for req in recent_requests]

	return {
	'total_requests': len(recent_requests),
	'successful_requests': sum(successes),
	'success_rate': sum(successes) / len(successes) if successes else 0,
	'average_execution_time': sum(execution_times) / len(execution_times) if execution_times else 0,
	'p95_execution_time': sorted(execution_times)[int(len(execution_times) * 0.95)] if execution_times else 0,
	'endpoints': {
	req['endpoint']: {
	'count': 1,
	'success': req['response_data']['success']
	}
	for req in recent_requests
	}
	}

	except Exception as e:
	logger.error(f"Failed to get API metrics: {e}")
	return {}

	def get_alerts(self) -> List[Dict[str, Any]]:
	"""Get recent alerts"""
	try:
	alerts_file = self.log_dir / "alerts.jsonl"
	if not alerts_file.exists():
	return []

	alerts = []
	with open(alerts_file, 'r') as f:
	for line in f:
	try:
	alert = json.loads(line.strip())
	alerts.append(alert)
	except json.JSONDecodeError:
	continue

	# Return recent alerts (last 24 hours)
	one_day_ago = datetime.now() - timedelta(days=1)
	recent_alerts = [
	alert for alert in alerts
	if datetime.fromisoformat(alert['timestamp']) > one_day_ago
	]

	return sorted(recent_alerts, key=lambda x: x['timestamp'], reverse=True)[:10]

	except Exception as e:
	logger.error(f"Failed to get alerts: {e}")
	return []

	def display_dashboard(self):
	"""Display dashboard in terminal"""
	# Clear screen (ANSI escape code)
	print("\033[2J\033[H")

	print("=" * 60)
	print("🏗️ SHEIKH-KITTY MONITORING DASHBOARD")
	print("=" * 60)
	print(f"📅 {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
	print()

	# System Status
	system_status = self.get_system_status()
	if system_status:
	print("🖥️ SYSTEM STATUS")
	print("-" * 20)

	# CPU
	cpu = system_status['cpu']
	status_icon = self._get_status_icon(cpu['status'])
	print(f"{status_icon} CPU: {cpu['usage_percent']:6.1f}% (Cores: {cpu['count']}, Load: {cpu['load_average']:.2f})")

	# Memory
	memory = system_status['memory']
	status_icon = self._get_status_icon(memory['status'])
	print(f"{status_icon} Memory: {memory['usage_percent']:6.1f}% (Available: {memory['available_gb']:.1f}GB)")

	# Disk
	disk = system_status['disk']
	status_icon = self._get_status_icon(disk['status'])
	print(f"{status_icon} Disk: {disk['usage_percent']:6.1f}% (Free: {disk['free_gb']:.1f}GB)")
	print()

	# API Metrics
	api_metrics = self.get_api_metrics()
	if api_metrics:
	print("🌐 API METRICS (Last Hour)")
	print("-" * 25)
	print(f"📊 Requests: {api_metrics['total_requests']}")
	print(f"✅ Success: {api_metrics['successful_requests']} ({api_metrics['success_rate']:.1%})")
	print(f"⏱️ Avg Time: {api_metrics['average_execution_time']:.3f}s")
	print(f"🚀 P95 Time: {api_metrics['p95_execution_time']:.3f}s")

	# Endpoint breakdown
	if api_metrics['endpoints']:
	print("🔗 Endpoints:")
	for endpoint, stats in api_metrics['endpoints'].items():
	print(f" {endpoint}: {stats['count']} requests, {stats['success']:.1%} success")
	print()

	# Recent Alerts
	alerts = self.get_alerts()
	if alerts:
	print("🚨 RECENT ALERTS")
	print("-" * 15)
	for alert in alerts[:5]: # Show last 5 alerts
	severity_icon = self._get_alert_icon(alert['severity'])
	print(f"{severity_icon} {alert['severity'].upper()}: {alert['message']}")
	print(f" 📅 {alert['timestamp']}")
	print()

	# Health Summary
	print("💚 SYSTEM HEALTH")
	print("-" * 15)
	health_score = self._calculate_health_score(system_status, api_metrics, alerts)
	health_status = self._get_health_status(health_score)
	print(f"Overall: {health_status} ({health_score:.1%})")
	print()

	print("Press Ctrl+C to exit")

	def _get_status_icon(self, status: str) -> str:
	"""Get icon for status"""
	icons = {
	'healthy': '🟢',
	'warning': '🟡',
	'critical': '🔴'
	}
	return icons.get(status, '⚪')

	def _get_alert_icon(self, severity: str) -> str:
	"""Get icon for alert severity"""
	icons = {
	'info': 'ℹ️',
	'warning': '⚠️',
	'error': '❌',
	'critical': '🚨'
	}
	return icons.get(severity, '📢')

	def _calculate_health_score(self, system_status: Dict, api_metrics: Dict, alerts: List) -> float:
	"""Calculate overall health score"""
	score = 1.0

	# Deduct for system resource issues
	if system_status:
	if system_status['cpu']['status'] == 'warning':
	score -= 0.1
	elif system_status['cpu']['status'] == 'critical':
	score -= 0.2

	if system_status['memory']['status'] == 'warning':
	score -= 0.1
	elif system_status['memory']['status'] == 'critical':
	score -= 0.2

	if system_status['disk']['status'] == 'warning':
	score -= 0.1
	elif system_status['disk']['status'] == 'critical':
	score -= 0.2

	# Deduct for API issues
	if api_metrics:
	success_rate = api_metrics.get('success_rate', 1.0)
	if success_rate < 0.95:
	score -= (0.95 - success_rate)

	# Deduct for recent alerts
	recent_critical_alerts = sum(1 for alert in alerts if alert['severity'] == 'critical')
	if recent_critical_alerts > 0:
	score -= min(0.3, recent_critical_alerts * 0.1)

	return max(0.0, score)

	def _get_health_status(self, score: float) -> str:
	"""Get health status text"""
	if score >= 0.9:
	return "Excellent"
	elif score >= 0.8:
	return "Good"
	elif score >= 0.7:
	return "Fair"
	elif score >= 0.5:
	return "Poor"
	else:
	return "Critical"

	def save_dashboard_state(self):
	"""Save current dashboard state"""
	try:
	state = {
	'timestamp': datetime.now().isoformat(),
	'system_status': self.get_system_status(),
	'api_metrics': self.get_api_metrics(),
	'alerts': self.get_alerts()
	}

	with open(self.state_file, 'w') as f:
	json.dump(state, f, indent=2)

	except Exception as e:
	logger.error(f"Failed to save dashboard state: {e}")

	def run_continuous_monitoring(self, update_interval: int = 30):
	"""Run continuous dashboard monitoring"""
	try:
	while True:
	self.display_dashboard()
	self.save_dashboard_state()
	time.sleep(update_interval)
	except KeyboardInterrupt:
	print("\n👋 Monitoring dashboard stopped")
	except Exception as e:
	logger.error(f"Dashboard error: {e}")


	def main():
	"""Main dashboard execution"""
	import argparse

	parser = argparse.ArgumentParser(description="Sheikh-Kitty Monitoring Dashboard")
	parser.add_argument('--interval', type=int, default=30, help='Update interval in seconds')
	parser.add_argument('--once', action='store_true', help='Display once and exit')

	args = parser.parse_args()

	dashboard = SimpleDashboard()

	if args.once:
	dashboard.display_dashboard()
	else:
	print("Starting Sheikh-Kitty monitoring dashboard...")
	print("Press Ctrl+C to exit")
	dashboard.run_continuous_monitoring(args.interval)


	if __name__ == "__main__":
	main()