File size: 2,517 Bytes
b53ee19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import pandas as pd
from pathlib import Path
from datetime import datetime, timedelta
import json
from typing import Dict, Any


class MonitoringReportGenerator:
    def __init__(self, monitoring_dir: Path):
        self.monitoring_dir = Path(monitoring_dir)
        self.monitoring_dir.mkdir(parents=True, exist_ok=True)
    
    def generate_daily_report(self, 
                            predictions_df: pd.DataFrame,
                            drift_report: Dict[str, Any],
                            performance_metrics: Dict[str, float]) -> Dict[str, Any]:
        """Generate comprehensive daily monitoring report"""
        report = {
            "report_date": datetime.now().strftime('%Y-%m-%d'),
            "generated_at": datetime.now().isoformat(),
            "predictions": {
                "total_predictions": len(predictions_df),
                "prediction_distribution": predictions_df['prediction'].value_counts().to_dict() if 'prediction' in predictions_df.columns else {}
            },
            "drift": drift_report,
            "performance": performance_metrics,
            "status": "healthy" if not drift_report.get("drift_detected", False) else "warning"
        }
        
        report_path = self.monitoring_dir / f"report_{datetime.now().strftime('%Y%m%d')}.json"
        with open(report_path, 'w') as f:
            json.dump(report, f, indent=2)
        
        return report
    
    def get_weekly_summary(self) -> Dict[str, Any]:
        """Get summary of past week's monitoring data"""
        end_date = datetime.now()
        start_date = end_date - timedelta(days=7)
        
        reports = []
        for i in range(7):
            date = (start_date + timedelta(days=i)).strftime('%Y%m%d')
            report_path = self.monitoring_dir / f"report_{date}.json"
            if report_path.exists():
                with open(report_path, 'r') as f:
                    reports.append(json.load(f))
        
        if not reports:
            return {"status": "no_data", "period": "last_7_days"}
        
        return {
            "period": "last_7_days",
            "total_reports": len(reports),
            "days_with_drift": sum(1 for r in reports if r.get('drift', {}).get('drift_detected', False)),
            "avg_predictions_per_day": sum(r.get('predictions', {}).get('total_predictions', 0) for r in reports) / len(reports),
            "status": "healthy" if all(r.get('status') == 'healthy' for r in reports) else "needs_attention"
        }