Navya-Sree's picture
Create utils/monitoring.py
52e8fba verified
import json
import os
from datetime import datetime
from typing import Dict, Any, List
import pandas as pd
class MonitoringSystem:
"""
System for monitoring agent performance and tracking metrics.
"""
def __init__(self, log_file="logs/monitoring_logs.json"):
self.log_file = log_file
self._ensure_log_directory()
def _ensure_log_directory(self):
"""Create logs directory if it doesn't exist."""
os.makedirs(os.path.dirname(self.log_file), exist_ok=True)
# Initialize log file if it doesn't exist
if not os.path.exists(self.log_file):
with open(self.log_file, 'w') as f:
json.dump([], f)
def log_generation(self, data: Dict[str, Any]):
"""
Log a code generation event.
Args:
data: Dictionary containing generation details
"""
try:
# Read existing logs
with open(self.log_file, 'r') as f:
logs = json.load(f)
# Add timestamp if not present
if 'timestamp' not in data:
data['timestamp'] = datetime.now().isoformat()
# Add to logs
logs.append(data)
# Write back (limit to last 100 entries)
with open(self.log_file, 'w') as f:
json.dump(logs[-100:], f, indent=2)
except Exception as e:
print(f"Error logging generation: {e}")
def get_metrics(self, n_days: int = 7) -> List[Dict]:
"""
Get metrics from the last n days.
Args:
n_days: Number of days to look back
Returns:
List of log entries
"""
try:
with open(self.log_file, 'r') as f:
logs = json.load(f)
# Filter by date if requested
if n_days:
cutoff_date = datetime.now().timestamp() - (n_days * 24 * 60 * 60)
filtered_logs = [
log for log in logs
if datetime.fromisoformat(log['timestamp']).timestamp() > cutoff_date
]
return filtered_logs
else:
return logs
except Exception as e:
print(f"Error reading metrics: {e}")
return []
def calculate_statistics(self) -> Dict[str, Any]:
"""
Calculate statistics from logs.
Returns:
Dictionary with statistics
"""
logs = self.get_metrics(n_days=None)
if not logs:
return {
"total_generations": 0,
"average_score": 0,
"success_rate": 0
}
# Convert to DataFrame for easier analysis
df = pd.DataFrame(logs)
stats = {
"total_generations": len(df),
"models_used": df['model'].value_counts().to_dict() if 'model' in df.columns else {},
"unique_prompts": df['prompt'].nunique() if 'prompt' in df.columns else 0
}
# Calculate average scores if available
if 'review_score' in df.columns:
stats['average_review_score'] = df['review_score'].mean()
stats['best_score'] = df['review_score'].max()
stats['worst_score'] = df['review_score'].min()
if 'test_score' in df.columns:
test_scores = df['test_score'].dropna()
if len(test_scores) > 0:
stats['average_test_score'] = test_scores.mean()
# Calculate success rate (assuming any generation with a score is successful)
successful = len(df[df['review_score'] > 0]) if 'review_score' in df.columns else 0
stats['success_rate'] = (successful / len(df)) * 100 if len(df) > 0 else 0
return stats
def export_logs(self, format: str = "csv") -> str:
"""
Export logs in specified format.
Args:
format: Output format (csv, json)
Returns:
Path to exported file
"""
logs = self.get_metrics(n_days=None)
df = pd.DataFrame(logs)
if format == "csv":
export_path = self.log_file.replace('.json', '.csv')
df.to_csv(export_path, index=False)
return export_path
elif format == "json":
return self.log_file
else:
raise ValueError(f"Unsupported format: {format}")