saemstunes commited on
Commit
232f382
·
verified ·
1 Parent(s): 1069dad

Create monitoring_system.py

Browse files
Files changed (1) hide show
  1. src/monitoring_system.py +70 -0
src/monitoring_system.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import psutil
3
+ from datetime import datetime, timedelta
4
+ from typing import Dict, List, Optional
5
+ import logging
6
+ from threading import Thread
7
+
8
+ class ComprehensiveMonitor:
9
+ """Monitoring system for performance tracking"""
10
+
11
+ def __init__(self):
12
+ self.inference_metrics = []
13
+ self.system_metrics = []
14
+ self.start_time = datetime.now()
15
+ self.setup_logging()
16
+
17
+ def setup_logging(self):
18
+ """Setup logging"""
19
+ self.logger = logging.getLogger(__name__)
20
+
21
+ def record_inference(self, metrics: Dict):
22
+ """Record inference metrics"""
23
+ metrics['timestamp'] = datetime.now()
24
+ self.inference_metrics.append(metrics)
25
+
26
+ # Keep only last 1000 records
27
+ if len(self.inference_metrics) > 1000:
28
+ self.inference_metrics = self.inference_metrics[-1000:]
29
+
30
+ def get_recent_metrics(self, minutes: int = 5) -> List[Dict]:
31
+ """Get metrics from recent time window"""
32
+ cutoff = datetime.now() - timedelta(minutes=minutes)
33
+ return [m for m in self.inference_metrics if m['timestamp'] > cutoff]
34
+
35
+ def get_average_response_time(self) -> float:
36
+ """Get average response time"""
37
+ recent_metrics = self.get_recent_metrics(30) # Last 30 minutes
38
+ if not recent_metrics:
39
+ return 0.0
40
+
41
+ successful_metrics = [m for m in recent_metrics if m.get('success', False)]
42
+ if not successful_metrics:
43
+ return 0.0
44
+
45
+ return sum(m['processing_time_ms'] for m in successful_metrics) / len(successful_metrics)
46
+
47
+ def get_error_rate(self) -> float:
48
+ """Get error rate percentage"""
49
+ recent_metrics = self.get_recent_metrics(30) # Last 30 minutes
50
+ if not recent_metrics:
51
+ return 0.0
52
+
53
+ errors = sum(1 for m in recent_metrics if not m.get('success', True))
54
+ return (errors / len(recent_metrics)) * 100
55
+
56
+ def get_uptime(self) -> float:
57
+ """Get system uptime in seconds"""
58
+ return (datetime.now() - self.start_time).total_seconds()
59
+
60
+ def get_system_health(self) -> Dict:
61
+ """Get system health status"""
62
+ return {
63
+ "cpu_percent": psutil.cpu_percent(),
64
+ "memory_percent": psutil.virtual_memory().percent,
65
+ "disk_percent": psutil.disk_usage('/').percent,
66
+ "uptime_seconds": self.get_uptime(),
67
+ "total_requests": len(self.inference_metrics),
68
+ "error_rate": self.get_error_rate(),
69
+ "avg_response_time": self.get_average_response_time()
70
+ }