Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| """ | |
| Enhanced Production Monitoring Dashboard with Historical Data | |
| Real-time monitoring with historical trends and analytics | |
| """ | |
| import json | |
| import os | |
| import sys | |
| import time | |
| import urllib.parse | |
| import urllib.request | |
| from datetime import datetime | |
| from http.server import BaseHTTPRequestHandler, HTTPServer | |
| # Import metrics database | |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) | |
| from metrics_database import metrics_db | |
| # Alert system for frontend errors | |
| alert_history = [] | |
| alert_config = { | |
| "enabled": True, | |
| "thresholds": { | |
| "cpu_warning": 70, | |
| "cpu_critical": 90, | |
| "memory_warning": 80, | |
| "memory_critical": 95, | |
| "disk_warning": 85, | |
| "disk_critical": 95, | |
| }, | |
| } | |
| def send_console_alert(alert): | |
| """Send alert to console""" | |
| severity_emoji = {"warning": "β οΈ", "critical": "π¨"} | |
| emoji = severity_emoji.get(alert["severity"], "π’") | |
| timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| print(f"\\n{emoji} PRODUCTION ALERT - {timestamp}") | |
| print(f" Type: {alert['type'].upper()}") | |
| print(f" Severity: {alert['severity'].upper()}") | |
| print(f" Message: {alert['message']}") | |
| if "value" in alert: | |
| print(f" Value: {alert['value']}") | |
| if "context" in alert and alert["context"]: | |
| print(f" Context: {alert['context']}") | |
| print("-" * 50) | |
| class EnhancedMonitoringHandler(BaseHTTPRequestHandler): | |
| def do_GET(self): | |
| if self.path == "/": | |
| self.serve_dashboard() | |
| elif self.path == "/api/metrics": | |
| self.serve_metrics() | |
| elif self.path == "/api/health": | |
| self.serve_health() | |
| elif self.path == "/api/environments": | |
| self.serve_environments() | |
| elif self.path == "/api/historical": | |
| self.serve_historical_data() | |
| elif self.path == "/api/summary": | |
| self.serve_metrics_summary() | |
| else: | |
| self.send_404() | |
| def serve_dashboard(self): | |
| self.send_response(200) | |
| self.send_header("Content-type", "text/html") | |
| self.end_headers() | |
| html = """ | |
| <!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>Enhanced Production Monitoring Dashboard</title> | |
| <style> | |
| * { margin: 0; padding: 0; box-sizing: border-box; } | |
| body { | |
| font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; | |
| background: linear-gradient(135deg, #1e3c72 0%, #2a5298 100%); | |
| color: white; min-height: 100vh; | |
| } | |
| .container { max-width: 1400px; margin: 0 auto; padding: 20px; } | |
| .header { text-align: center; margin-bottom: 40px; } | |
| .header h1 { font-size: 2.5rem; margin-bottom: 10px; } | |
| .header p { font-size: 1.1rem; opacity: 0.8; } | |
| .section-title { font-size: 1.4rem; margin: 30px 0 20px 0; color: #ffd700; } | |
| .metrics-grid { | |
| display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); | |
| gap: 20px; margin-bottom: 40px; | |
| } | |
| .metric-card { | |
| background: rgba(255, 255, 255, 0.1); backdrop-filter: blur(10px); | |
| border-radius: 15px; padding: 25px; border: 1px solid rgba(255, 255, 255, 0.2); | |
| transition: transform 0.3s ease; | |
| } | |
| .metric-card:hover { transform: translateY(-5px); } | |
| .metric-title { font-size: 1.2rem; margin-bottom: 15px; color: #64b5f6; } | |
| .metric-value { font-size: 2rem; font-weight: bold; margin-bottom: 5px; } | |
| .metric-label { font-size: 0.9rem; opacity: 0.7; } | |
| .env-card { | |
| background: rgba(255, 255, 255, 0.15); backdrop-filter: blur(10px); | |
| border-radius: 15px; padding: 20px; margin-bottom: 20px; | |
| border: 2px solid rgba(255, 255, 255, 0.3); | |
| } | |
| .env-blue { border-color: #4fc3f7; } | |
| .env-green { border-color: #66bb6a; } | |
| .env-status { display: flex; align-items: center; margin-bottom: 10px; } | |
| .env-indicator { width: 16px; height: 16px; border-radius: 50%; margin-right: 10px; } | |
| .status-healthy { background: #4caf50; } | |
| .status-unhealthy { background: #f44336; } | |
| .status-unknown { background: #ff9800; } | |
| .chart-container { | |
| background: rgba(255, 255, 255, 0.1); backdrop-filter: blur(10px); | |
| border-radius: 15px; padding: 25px; margin-bottom: 20px; | |
| border: 1px solid rgba(255, 255, 255, 0.2); | |
| } | |
| .refresh-btn { | |
| background: #64b5f6; color: white; border: none; padding: 10px 20px; | |
| border-radius: 25px; cursor: pointer; font-size: 1rem; | |
| transition: background 0.3s ease; | |
| } | |
| .refresh-btn:hover { background: #42a5f5; } | |
| .last-updated { text-align: center; opacity: 0.7; margin-top: 20px; } | |
| .two-column { display: grid; grid-template-columns: 1fr 1fr; gap: 20px; } | |
| .summary-card { | |
| background: rgba(255, 255, 255, 0.1); backdrop-filter: blur(10px); | |
| border-radius: 15px; padding: 20px; margin-bottom: 20px; | |
| border: 1px solid rgba(255, 255, 255, 0.2); | |
| } | |
| .summary-stat { font-size: 1.1rem; margin: 5px 0; } | |
| .trend-up { color: #4caf50; } | |
| .trend-down { color: #f44336; } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="container"> | |
| <div class="header"> | |
| <h1>π Enhanced Production Monitoring</h1> | |
| <p>Real-time monitoring with historical data and analytics</p> | |
| <button class="refresh-btn" onclick="refreshData()">π Refresh Data</button> | |
| </div> | |
| <div class="section-title">π» System Resources</div> | |
| <div class="metrics-grid"> | |
| <div class="metric-card"> | |
| <div class="metric-title">π₯οΈ CPU Usage</div> | |
| <div class="metric-value" id="cpu-value">--%</div> | |
| <div class="metric-label">Processing load</div> | |
| </div> | |
| <div class="metric-card"> | |
| <div class="metric-title">π§ Memory Usage</div> | |
| <div class="metric-value" id="memory-value">--%</div> | |
| <div class="metric-label">Memory consumption</div> | |
| </div> | |
| <div class="metric-card"> | |
| <div class="metric-title">πΎ Disk Usage</div> | |
| <div class="metric-value" id="disk-value">--%</div> | |
| <div class="metric-label">Storage utilization</div> | |
| </div> | |
| <div class="metric-card"> | |
| <div class="metric-title">β±οΈ Uptime</div> | |
| <div class="metric-value" id="uptime-value">--</div> | |
| <div class="metric-label">System running time</div> | |
| </div> | |
| </div> | |
| <div class="section-title">π Deployment Environments</div> | |
| <div class="two-column"> | |
| <div class="env-card env-blue"> | |
| <div style="font-size: 1.3rem; margin-bottom: 15px;">π΅ Blue Environment</div> | |
| <div class="env-status"> | |
| <div id="blue-status-indicator" class="env-indicator status-unknown"></div> | |
| <span id="blue-status-text">Checking...</span> | |
| </div> | |
| <div style="font-size: 1.1rem;" id="blue-response-time">Response Time: --ms</div> | |
| <div style="font-size: 1.1rem;" id="blue-availability">24h Availability: --%</div> | |
| </div> | |
| <div class="env-card env-green"> | |
| <div style="font-size: 1.3rem; margin-bottom: 15px;">π’ Green Environment</div> | |
| <div class="env-status"> | |
| <div id="green-status-indicator" class="env-indicator status-unknown"></div> | |
| <span id="green-status-text">Checking...</span> | |
| </div> | |
| <div style="font-size: 1.1rem;" id="green-response-time">Response Time: --ms</div> | |
| <div style="font-size: 1.1rem;" id="green-availability">24h Availability: --%</div> | |
| </div> | |
| </div> | |
| <div class="section-title">π Historical Summary (Last 24 Hours)</div> | |
| <div class="summary-card"> | |
| <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 20px;"> | |
| <div> | |
| <div style="font-size: 1.2rem; margin-bottom: 10px;">π System Performance</div> | |
| <div class="summary-stat" id="avg-cpu">Avg CPU: --%</div> | |
| <div class="summary-stat" id="max-cpu">Peak CPU: --%</div> | |
| <div class="summary-stat" id="avg-memory">Avg Memory: --%</div> | |
| <div class="summary-stat" id="max-memory">Peak Memory: --%</div> | |
| </div> | |
| <div> | |
| <div style="font-size: 1.2rem; margin-bottom: 10px;">π Environment Health</div> | |
| <div class="summary-stat" id="blue-availability-summary">Blue Availability: --%</div> | |
| <div class="summary-stat" id="green-availability-summary">Green Availability: --%</div> | |
| <div class="summary-stat" id="total-readings">Total Readings: --</div> | |
| </div> | |
| <div> | |
| <div style="font-size: 1.2rem; margin-bottom: 10px;">πΎ Database Health</div> | |
| <div class="summary-stat" id="db-availability">DB Availability: --%</div> | |
| <div class="summary-stat" id="db-checks">Health Checks: --</div> | |
| <div class="summary-stat" id="data-points">Stored Data Points: --</div> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="chart-container"> | |
| <div class="chart-title">π Performance Trends (Last 2 Hours)</div> | |
| <canvas id="performance-canvas" style="width: 100%; height: 200px;"></canvas> | |
| </div> | |
| <div class="last-updated" id="last-updated"> | |
| Last updated: Loading... | |
| </div> | |
| </div> | |
| <script> | |
| let metricsData = []; | |
| let historicalData = []; | |
| async function fetchEndpoint(endpoint) { | |
| try { | |
| const response = await fetch(endpoint); | |
| return await response.json(); | |
| } catch (error) { | |
| console.error('Error fetching', endpoint, ':', error); | |
| return null; | |
| } | |
| } | |
| function updateDashboard(data) { | |
| if (!data) return; | |
| document.getElementById('cpu-value').textContent = data.cpu_percent?.toFixed(1) + '%' || '--%'; | |
| document.getElementById('memory-value').textContent = data.memory_percent?.toFixed(1) + '%' || '--%'; | |
| document.getElementById('disk-value').textContent = data.disk_percent?.toFixed(1) + '%' || '--%'; | |
| document.getElementById('uptime-value').textContent = data.uptime || '--'; | |
| document.getElementById('last-updated').textContent = | |
| 'Last updated: ' + new Date().toLocaleTimeString(); | |
| } | |
| function updateEnvironments(data) { | |
| if (!data) return; | |
| const blueHealthy = data.blue_environment?.healthy || false; | |
| const blueTime = data.blue_environment?.response_time || 0; | |
| document.getElementById('blue-status-indicator').className = | |
| 'env-indicator ' + (blueHealthy ? 'status-healthy' : 'status-unhealthy'); | |
| document.getElementById('blue-status-text').textContent = | |
| blueHealthy ? 'Healthy' : 'Unhealthy'; | |
| document.getElementById('blue-response-time').textContent = | |
| 'Response Time: ' + blueTime + 'ms'; | |
| const greenHealthy = data.green_environment?.healthy || false; | |
| const greenTime = data.green_environment?.response_time || 0; | |
| document.getElementById('green-status-indicator').className = | |
| 'env-indicator ' + (greenHealthy ? 'status-healthy' : 'status-unhealthy'); | |
| document.getElementById('green-status-text').textContent = | |
| greenHealthy ? 'Healthy' : 'Unhealthy'; | |
| document.getElementById('green-response-time').textContent = | |
| 'Response Time: ' + greenTime + 'ms'; | |
| } | |
| function updateSummary(data) { | |
| if (!data) return; | |
| const system = data.system_metrics || {}; | |
| document.getElementById('avg-cpu').textContent = 'Avg CPU: ' + (system.avg_cpu?.toFixed(1) || '--') + '%'; | |
| document.getElementById('max-cpu').textContent = 'Peak CPU: ' + (system.max_cpu?.toFixed(1) || '--') + '%'; | |
| document.getElementById('avg-memory').textContent = 'Avg Memory: ' + (system.avg_memory?.toFixed(1) || '--') + '%'; | |
| document.getElementById('max-memory').textContent = 'Peak Memory: ' + (system.max_memory?.toFixed(1) || '--') + '%'; | |
| const envAvailability = data.environment_availability || []; | |
| const blueEnv = envAvailability.find(e => e.environment === 'blue') || {}; | |
| const greenEnv = envAvailability.find(e => e.environment === 'green') || {}; | |
| document.getElementById('blue-availability').textContent = '24h Availability: ' + (blueEnv.availability_percent || '--') + '%'; | |
| document.getElementById('green-availability').textContent = '24h Availability: ' + (greenEnv.availability_percent || '--') + '%'; | |
| document.getElementById('blue-availability-summary').textContent = 'Blue Availability: ' + (blueEnv.availability_percent || '--') + '%'; | |
| document.getElementById('green-availability-summary').textContent = 'Green Availability: ' + (greenEnv.availability_percent || '--') + '%'; | |
| document.getElementById('total-readings').textContent = 'Total Readings: ' + (blueEnv.total_checks || 0) + ' (Blue) + ' + (greenEnv.total_checks || 0) + ' (Green)'; | |
| const db = data.database_health || {}; | |
| document.getElementById('db-availability').textContent = 'DB Availability: ' + (db.db_availability_percent || '--') + '%'; | |
| document.getElementById('db-checks').textContent = 'Health Checks: ' + (db.total_checks || '--'); | |
| document.getElementById('data-points').textContent = 'Stored Data Points: ' + ((system.total_readings || 0) + (db.total_checks || 0) + (blueEnv.total_checks || 0) + (greenEnv.total_checks || 0)); | |
| } | |
| function drawChart() { | |
| const canvas = document.getElementById('performance-canvas'); | |
| const ctx = canvas.getContext('2d'); | |
| canvas.width = canvas.offsetWidth; | |
| canvas.height = canvas.offsetHeight; | |
| if (historicalData.length < 2) return; | |
| const width = canvas.width; | |
| const height = canvas.height; | |
| const padding = 20; | |
| ctx.clearRect(0, 0, width, height); | |
| ctx.strokeStyle = 'rgba(255, 255, 255, 0.1)'; | |
| ctx.lineWidth = 1; | |
| for (let i = 0; i <= 4; i++) { | |
| const y = padding + (height - 2 * padding) * i / 4; | |
| ctx.beginPath(); | |
| ctx.moveTo(padding, y); | |
| ctx.lineTo(width - padding, y); | |
| ctx.stroke(); | |
| } | |
| // Draw CPU line | |
| ctx.strokeStyle = '#ff6b6b'; | |
| ctx.lineWidth = 2; | |
| ctx.beginPath(); | |
| for (let i = 0; i < historicalData.length; i++) { | |
| const x = padding + (width - 2 * padding) * i / (historicalData.length - 1); | |
| const y = height - padding - (height - 2 * padding) * (historicalData[i].cpu_percent || 0) / 100; | |
| if (i === 0) ctx.moveTo(x, y); | |
| else ctx.lineTo(x, y); | |
| } | |
| ctx.stroke(); | |
| // Draw Memory line | |
| ctx.strokeStyle = '#4ecdc4'; | |
| ctx.lineWidth = 2; | |
| ctx.beginPath(); | |
| for (let i = 0; i < historicalData.length; i++) { | |
| const x = padding + (width - 2 * padding) * i / (historicalData.length - 1); | |
| const y = height - padding - (height - 2 * padding) * (historicalData[i].memory_percent || 0) / 100; | |
| if (i === 0) ctx.moveTo(x, y); | |
| else ctx.lineTo(x, y); | |
| } | |
| ctx.stroke(); | |
| } | |
| async function refreshData() { | |
| const [metrics, environments, historical, summary] = await Promise.all([ | |
| fetchEndpoint('/api/metrics'), | |
| fetchEndpoint('/api/environments'), | |
| fetchEndpoint('/api/historical'), | |
| fetchEndpoint('/api/summary') | |
| ]); | |
| updateDashboard(metrics); | |
| updateEnvironments(environments); | |
| updateSummary(summary); | |
| if (historical && historical.system_metrics) { | |
| historicalData = historical.system_metrics.slice(0, 50); // Last 50 readings | |
| drawChart(); | |
| } | |
| } | |
| setInterval(refreshData, 10000); // Refresh every 10 seconds | |
| refreshData(); | |
| </script> | |
| </body> | |
| </html> | |
| """ | |
| self.wfile.write(html.encode()) | |
| def serve_metrics(self): | |
| self.send_response(200) | |
| self.send_header("Content-type", "application/json") | |
| self.end_headers() | |
| current_metrics = collect_system_metrics() | |
| response = json.dumps(current_metrics, indent=2) | |
| self.wfile.write(response.encode()) | |
| def serve_health(self): | |
| self.send_response(200) | |
| self.send_header("Content-type", "application/json") | |
| self.end_headers() | |
| health_data = { | |
| "status": "healthy", | |
| "timestamp": datetime.now().isoformat(), | |
| "version": "2.0.0", | |
| "uptime": time.time() - start_time, | |
| } | |
| response = json.dumps(health_data, indent=2) | |
| self.wfile.write(response.encode()) | |
| def serve_environments(self): | |
| self.send_response(200) | |
| self.send_header("Content-type", "application/json") | |
| self.end_headers() | |
| env_data = collect_environment_metrics() | |
| response = json.dumps(env_data, indent=2) | |
| self.wfile.write(response.encode()) | |
| def serve_historical_data(self): | |
| self.send_response(200) | |
| self.send_header("Content-type", "application/json") | |
| self.end_headers() | |
| historical_data = { | |
| "system_metrics": metrics_db.get_system_metrics(hours=2), # Last 2 hours | |
| "blue_environment": metrics_db.get_environment_metrics("blue", hours=2), | |
| "green_environment": metrics_db.get_environment_metrics("green", hours=2), | |
| "database_metrics": metrics_db.get_database_metrics(hours=2), | |
| } | |
| response = json.dumps(historical_data, indent=2) | |
| self.wfile.write(response.encode()) | |
| def serve_metrics_summary(self): | |
| self.send_response(200) | |
| self.send_header("Content-type", "application/json") | |
| self.end_headers() | |
| summary = metrics_db.get_metrics_summary(hours=24) # Last 24 hours | |
| response = json.dumps(summary, indent=2) | |
| self.wfile.write(response.encode()) | |
| def send_404(self): | |
| self.send_response(404) | |
| self.send_header("Content-type", "text/html") | |
| self.end_headers() | |
| self.wfile.write(b"404 Not Found") | |
| def log_message(self, format, *args): | |
| pass | |
| def collect_system_metrics(): | |
| """Collect system metrics""" | |
| try: | |
| import psutil | |
| cpu_percent = psutil.cpu_percent(interval=1) | |
| memory = psutil.virtual_memory() | |
| disk = psutil.disk_usage("/") | |
| return { | |
| "cpu_percent": cpu_percent, | |
| "memory_percent": memory.percent, | |
| "memory_available_gb": memory.available / (1024**3), | |
| "disk_percent": disk.percent, | |
| "disk_free_gb": disk.free / (1024**3), | |
| "uptime": time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)), | |
| "timestamp": datetime.now().isoformat(), | |
| "process_count": len(psutil.pids()), | |
| } | |
| except ImportError: | |
| import random | |
| cpu = random.uniform(20, 80) | |
| memory = random.uniform(30, 70) | |
| disk = random.uniform(40, 60) | |
| return { | |
| "cpu_percent": cpu, | |
| "memory_percent": memory, | |
| "memory_available_gb": random.uniform(2, 8), | |
| "disk_percent": disk, | |
| "disk_free_gb": random.uniform(10, 50), | |
| "uptime": time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)), | |
| "timestamp": datetime.now().isoformat(), | |
| "process_count": random.randint(100, 300), | |
| } | |
| def collect_environment_metrics(): | |
| """Collect environment health metrics""" | |
| environments = {} | |
| # Check Blue Environment (port 5002) | |
| blue_health = check_environment_health("localhost", 5002) | |
| environments["blue_environment"] = blue_health | |
| # Check Green Environment (port 5003) | |
| green_health = check_environment_health("localhost", 5003) | |
| environments["green_environment"] = green_health | |
| environments["timestamp"] = datetime.now().isoformat() | |
| return environments | |
| def check_environment_health(host, port): | |
| """Check environment health""" | |
| try: | |
| start_time = time.time() | |
| # Use urllib instead of requests for standard library compatibility | |
| req = urllib.request.Request(f"http://{host}:{port}/api/health") | |
| with urllib.request.urlopen(req, timeout=5) as response: | |
| response_time = (time.time() - start_time) * 1000 | |
| return { | |
| "healthy": response.status == 200, | |
| "response_time": int(response_time), | |
| "status": "healthy" if response.status == 200 else "unhealthy", | |
| "endpoint": f"http://{host}:{port}", | |
| "last_check": datetime.now().isoformat(), | |
| } | |
| except Exception: | |
| return { | |
| "healthy": False, | |
| "response_time": 5000, | |
| "status": "unreachable", | |
| "endpoint": f"http://{host}:{port}", | |
| "last_check": datetime.now().isoformat(), | |
| } | |
| def do_post(self): | |
| print(f"POST request received: {self.path}") # Debug logging | |
| if self.path == "/api/errors": | |
| self.handle_error_report() | |
| else: | |
| self.send_404() | |
| def handle_error_report(self): | |
| """Handle frontend error reports""" | |
| try: | |
| content_length = int(self.headers["Content-Length"]) | |
| post_data = self.rfile.read(content_length) | |
| error_data = json.loads(post_data.decode("utf-8")) | |
| # Record the frontend error | |
| frontend_error = { | |
| "type": "frontend_error", | |
| "severity": error_data.get("severity", "medium"), | |
| "message": f"Frontend Error: {error_data['error']['message']}", | |
| "value": error_data["error"]["message"], | |
| "context": error_data.get("context", {}), | |
| "timestamp": datetime.now().isoformat(), | |
| "source": "frontend", | |
| } | |
| # Add to alert history | |
| global alert_history | |
| alert_history.append(frontend_error) | |
| # Keep only recent alerts | |
| alert_history = alert_history[-100:] | |
| # Trigger console alert for frontend errors | |
| send_console_alert(frontend_error) | |
| self.send_response(200) | |
| self.send_header("Content-type", "application/json") | |
| self.end_headers() | |
| self.wfile.write(b'{"status": "recorded"}') | |
| except Exception as e: | |
| print(f"Error handling frontend error report: {e}") | |
| self.send_response(500) | |
| self.send_header("Content-type", "application/json") | |
| self.end_headers() | |
| self.wfile.write(b'{"error": "Failed to record error"}') | |
| def run_server(): | |
| """Run the enhanced monitoring server""" | |
| server_address = ("", 8080) | |
| httpd = HTTPServer(server_address, EnhancedMonitoringHandler) | |
| print("π Enhanced Production Monitoring Dashboard started!") | |
| print("π Access URL: http://localhost:8080") | |
| print("π Historical Data: Enabled") | |
| print("πΎ Time-Series Database: Active") | |
| print("π Auto-refresh: Every 10 seconds") | |
| print("βΉοΈ To stop: Press Ctrl+C") | |
| print("") | |
| try: | |
| httpd.serve_forever() | |
| except KeyboardInterrupt: | |
| print("\\nπ Enhanced Dashboard stopped by user") | |
| httpd.server_close() | |
| if __name__ == "__main__": | |
| start_time = time.time() | |
| run_server() | |