snikhilesh commited on
Commit
3e68886
·
verified ·
1 Parent(s): 38b1019

Deploy test_server_monitoring.py to backend/ directory

Browse files
Files changed (1) hide show
  1. backend/test_server_monitoring.py +317 -0
backend/test_server_monitoring.py ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Simplified Test Server for Monitoring Load Testing
3
+ Includes only monitoring infrastructure without heavy dependencies
4
+ """
5
+
6
+ from fastapi import FastAPI, Request
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from fastapi.responses import JSONResponse
9
+ from typing import Dict, Any
10
+ from datetime import datetime
11
+ import uuid
12
+ import logging
13
+
14
+ # Import monitoring modules
15
+ from monitoring_service import get_monitoring_service
16
+ from model_versioning import get_versioning_system
17
+ from production_logging import get_medical_logger
18
+ from compliance_reporting import get_compliance_system
19
+ from admin_endpoints import admin_router
20
+
21
+ # Configure logging
22
+ logging.basicConfig(level=logging.INFO)
23
+ logger = logging.getLogger(__name__)
24
+
25
+ # Initialize FastAPI app
26
+ app = FastAPI(
27
+ title="Medical AI Platform - Monitoring Test Server",
28
+ description="Simplified server for monitoring infrastructure load testing",
29
+ version="2.0.0"
30
+ )
31
+
32
+ # CORS configuration
33
+ app.add_middleware(
34
+ CORSMiddleware,
35
+ allow_origins=["*"],
36
+ allow_credentials=True,
37
+ allow_methods=["*"],
38
+ allow_headers=["*"],
39
+ )
40
+
41
+ # Initialize monitoring and infrastructure services
42
+ monitoring_service = get_monitoring_service()
43
+ versioning_system = get_versioning_system()
44
+ medical_logger = get_medical_logger("medical_ai_test")
45
+ compliance_system = get_compliance_system()
46
+
47
+ logger.info("Monitoring test server initialized")
48
+
49
+ # In-memory job tracking for testing
50
+ job_tracker: Dict[str, Dict[str, Any]] = {}
51
+
52
+ # Add monitoring middleware
53
+ @app.middleware("http")
54
+ async def monitoring_middleware(request: Request, call_next):
55
+ """Monitoring middleware for request tracking"""
56
+ start_time = datetime.utcnow()
57
+ request_id = str(uuid.uuid4())
58
+
59
+ medical_logger.info("Request received", {
60
+ "request_id": request_id,
61
+ "method": request.method,
62
+ "path": request.url.path,
63
+ "client": request.client.host if request.client else "unknown"
64
+ })
65
+
66
+ try:
67
+ response = await call_next(request)
68
+ end_time = datetime.utcnow()
69
+ latency_ms = (end_time - start_time).total_seconds() * 1000
70
+
71
+ monitoring_service.track_request(
72
+ endpoint=request.url.path,
73
+ latency_ms=latency_ms,
74
+ status_code=response.status_code
75
+ )
76
+
77
+ medical_logger.info("Request completed", {
78
+ "request_id": request_id,
79
+ "method": request.method,
80
+ "path": request.url.path,
81
+ "status_code": response.status_code,
82
+ "latency_ms": round(latency_ms, 2)
83
+ })
84
+
85
+ return response
86
+
87
+ except Exception as e:
88
+ end_time = datetime.utcnow()
89
+ latency_ms = (end_time - start_time).total_seconds() * 1000
90
+
91
+ monitoring_service.track_error(
92
+ endpoint=request.url.path,
93
+ error_type=type(e).__name__,
94
+ error_message=str(e)
95
+ )
96
+
97
+ medical_logger.error("Request failed", {
98
+ "request_id": request_id,
99
+ "method": request.method,
100
+ "path": request.url.path,
101
+ "error": str(e),
102
+ "error_type": type(e).__name__,
103
+ "latency_ms": round(latency_ms, 2)
104
+ })
105
+
106
+ raise
107
+
108
+ # Startup event handler
109
+ @app.on_event("startup")
110
+ async def startup_event():
111
+ """Initialize all services on startup"""
112
+
113
+ medical_logger.info("Starting monitoring test server initialization", {
114
+ "version": "2.0.0",
115
+ "timestamp": datetime.utcnow().isoformat()
116
+ })
117
+
118
+ # Initialize monitoring service
119
+ monitoring_service.start_monitoring()
120
+ medical_logger.info("Monitoring service initialized", {
121
+ "cache_enabled": True,
122
+ "alert_threshold": 0.05
123
+ })
124
+
125
+ # Register test model versions
126
+ model_versions = [
127
+ {"model_id": "bio_clinical_bert", "version": "1.0.0", "source": "HuggingFace"},
128
+ {"model_id": "biogpt", "version": "1.0.0", "source": "HuggingFace"},
129
+ {"model_id": "pubmed_bert", "version": "1.0.0", "source": "HuggingFace"},
130
+ {"model_id": "hubert_ecg", "version": "1.0.0", "source": "HuggingFace"},
131
+ {"model_id": "monai_unetr", "version": "1.0.0", "source": "HuggingFace"},
132
+ {"model_id": "medgemma_2b", "version": "1.0.0", "source": "HuggingFace"}
133
+ ]
134
+
135
+ for model_config in model_versions:
136
+ versioning_system.register_model_version(
137
+ model_id=model_config["model_id"],
138
+ version=model_config["version"],
139
+ metadata={"source": model_config["source"]}
140
+ )
141
+
142
+ medical_logger.info("Model versioning initialized", {
143
+ "total_models": len(model_versions)
144
+ })
145
+
146
+ # Test health check
147
+ try:
148
+ health_status = monitoring_service.get_system_health()
149
+ medical_logger.info("Health check successful", {
150
+ "status": health_status["status"],
151
+ "components_ready": True
152
+ })
153
+ except Exception as e:
154
+ medical_logger.error("Health check failed during startup", {
155
+ "error": str(e)
156
+ })
157
+
158
+ medical_logger.info("Monitoring test server startup complete", {
159
+ "status": "ready",
160
+ "timestamp": datetime.utcnow().isoformat()
161
+ })
162
+
163
+ # Include admin router
164
+ app.include_router(admin_router)
165
+
166
+ @app.get("/health")
167
+ async def health_check():
168
+ """Basic health check endpoint"""
169
+ system_health = monitoring_service.get_system_health()
170
+
171
+ return {
172
+ "status": system_health["status"],
173
+ "components": {
174
+ "monitoring": "active",
175
+ "versioning": "active",
176
+ "logging": "active",
177
+ "compliance": "active"
178
+ },
179
+ "monitoring": {
180
+ "uptime_seconds": system_health["uptime_seconds"],
181
+ "error_rate": system_health["error_rate"],
182
+ "active_alerts": system_health["active_alerts"],
183
+ "critical_alerts": system_health["critical_alerts"]
184
+ },
185
+ "timestamp": datetime.utcnow().isoformat()
186
+ }
187
+
188
+ @app.get("/health/dashboard")
189
+ async def get_health_dashboard():
190
+ """Comprehensive health dashboard endpoint"""
191
+ try:
192
+ system_health = monitoring_service.get_system_health()
193
+ cache_stats = monitoring_service.get_cache_statistics()
194
+ recent_alerts = monitoring_service.get_recent_alerts(limit=10)
195
+
196
+ # Get model performance metrics
197
+ model_metrics = {}
198
+ try:
199
+ active_models = versioning_system.list_model_versions()
200
+ for model_info in active_models[:10]:
201
+ model_id = model_info.get("model_id")
202
+ if model_id:
203
+ perf = versioning_system.get_model_performance(model_id)
204
+ if perf:
205
+ model_metrics[model_id] = {
206
+ "version": model_info.get("version", "unknown"),
207
+ "total_inferences": perf.get("total_inferences", 0),
208
+ "avg_latency_ms": perf.get("avg_latency_ms", 0),
209
+ "error_rate": perf.get("error_rate", 0.0),
210
+ "last_used": perf.get("last_used", "never")
211
+ }
212
+ except Exception as e:
213
+ medical_logger.warning("Failed to get model metrics", {"error": str(e)})
214
+
215
+ # Pipeline statistics
216
+ pipeline_stats = {
217
+ "total_jobs_processed": len(job_tracker),
218
+ "completed_jobs": sum(1 for job in job_tracker.values() if job.get("status") == "completed"),
219
+ "failed_jobs": sum(1 for job in job_tracker.values() if job.get("status") == "failed"),
220
+ "processing_jobs": sum(1 for job in job_tracker.values() if job.get("status") == "processing"),
221
+ "success_rate": 0.0
222
+ }
223
+
224
+ if pipeline_stats["total_jobs_processed"] > 0:
225
+ pipeline_stats["success_rate"] = (
226
+ pipeline_stats["completed_jobs"] / pipeline_stats["total_jobs_processed"]
227
+ )
228
+
229
+ # Synthesis statistics (mock for testing)
230
+ synthesis_stats = {
231
+ "total_syntheses": 0,
232
+ "avg_confidence": 0.0,
233
+ "requiring_review": 0,
234
+ "avg_processing_time_ms": 0
235
+ }
236
+
237
+ # Compliance overview
238
+ compliance_overview = {
239
+ "hipaa_compliant": True,
240
+ "gdpr_compliant": True,
241
+ "audit_logging_active": True,
242
+ "phi_removal_active": True,
243
+ "encryption_enabled": True
244
+ }
245
+
246
+ dashboard = {
247
+ "status": "operational" if system_health["status"] == "operational" else "degraded",
248
+ "timestamp": datetime.utcnow().isoformat(),
249
+
250
+ "system": {
251
+ "uptime_seconds": system_health["uptime_seconds"],
252
+ "uptime_human": f"{system_health['uptime_seconds'] // 3600}h {(system_health['uptime_seconds'] % 3600) // 60}m",
253
+ "error_rate": system_health["error_rate"],
254
+ "total_requests": system_health["total_requests"],
255
+ "error_threshold": 0.05,
256
+ "status": system_health["status"]
257
+ },
258
+
259
+ "pipeline": pipeline_stats,
260
+
261
+ "models": {
262
+ "total_registered": len(model_metrics),
263
+ "performance": model_metrics
264
+ },
265
+
266
+ "synthesis": synthesis_stats,
267
+
268
+ "cache": cache_stats,
269
+
270
+ "alerts": {
271
+ "active_count": system_health["active_alerts"],
272
+ "critical_count": system_health["critical_alerts"],
273
+ "recent": recent_alerts
274
+ },
275
+
276
+ "compliance": compliance_overview,
277
+
278
+ "components": {
279
+ "monitoring_system": "operational",
280
+ "versioning_system": "operational",
281
+ "logging_system": "operational",
282
+ "compliance_reporting": "operational",
283
+ "cache_service": "operational"
284
+ }
285
+ }
286
+
287
+ return dashboard
288
+
289
+ except Exception as e:
290
+ medical_logger.error("Dashboard generation failed", {
291
+ "error": str(e),
292
+ "timestamp": datetime.utcnow().isoformat()
293
+ })
294
+
295
+ return {
296
+ "status": "error",
297
+ "timestamp": datetime.utcnow().isoformat(),
298
+ "error": "Failed to generate complete dashboard",
299
+ "message": str(e)
300
+ }
301
+
302
+ @app.get("/")
303
+ async def root():
304
+ """Root endpoint"""
305
+ return {
306
+ "message": "Medical AI Platform - Monitoring Test Server",
307
+ "version": "2.0.0",
308
+ "endpoints": {
309
+ "health": "/health",
310
+ "dashboard": "/health/dashboard",
311
+ "admin": "/admin/*"
312
+ }
313
+ }
314
+
315
+ if __name__ == "__main__":
316
+ import uvicorn
317
+ uvicorn.run(app, host="0.0.0.0", port=7860)