gpu_monitoring_system / performance_optimizer.py
meccatronis's picture
Upload performance_optimizer.py with huggingface_hub
93936b3 verified
raw
history blame
23.8 kB
#!/usr/bin/env python3
"""
Performance Optimization Module
Provides system optimization features, resource monitoring, and performance
analytics for the GPU monitoring system.
"""
import time
import psutil
import logging
import threading
import json
from typing import Dict, List, Optional, Tuple, Any
from dataclasses import dataclass
from pathlib import Path
import os
logger = logging.getLogger(__name__)
@dataclass
class SystemMetrics:
"""System performance metrics."""
timestamp: float
cpu_percent: float
memory_percent: float
disk_percent: float
network_sent: int
network_recv: int
process_count: int
load_avg: Tuple[float, float, float]
@dataclass
class GPUMetrics:
"""GPU performance metrics."""
timestamp: float
temperature: float
load: float
power_draw: float
fan_speed: int
memory_usage: float
efficiency: float
core_clock: int
memory_clock: int
@dataclass
class OptimizationProfile:
"""System optimization profile."""
name: str
description: str
settings: Dict[str, Any]
enabled: bool = True
class SystemOptimizer:
"""System performance optimizer."""
def __init__(self, config_file: str = "config/optimization.json"):
self.config_file = config_file
self.profiles = {}
self.current_profile = None
self.monitoring = False
self.monitor_thread = None
# Performance data
self.system_history = []
self.gpu_history = []
# Load configuration
self.load_config()
def load_config(self):
"""Load optimization configuration."""
try:
if Path(self.config_file).exists():
with open(self.config_file, 'r') as f:
config = json.load(f)
# Load profiles
for name, profile_data in config.get('profiles', {}).items():
profile = OptimizationProfile(
name=name,
description=profile_data['description'],
settings=profile_data['settings'],
enabled=profile_data.get('enabled', True)
)
self.profiles[name] = profile
# Set current profile
default_profile = config.get('default_profile', 'balanced')
if default_profile in self.profiles:
self.current_profile = self.profiles[default_profile]
logger.info(f"Loaded {len(self.profiles)} optimization profiles")
else:
self.create_default_config()
except Exception as e:
logger.error(f"Error loading optimization config: {e}")
self.create_default_config()
def create_default_config(self):
"""Create default optimization configuration."""
default_config = {
"profiles": {
"power_saving": {
"name": "Power Saving",
"description": "Optimize for minimum power consumption",
"settings": {
"cpu_governor": "powersave",
"gpu_power_target": 150,
"fan_curve": "silent",
"monitoring_interval": 5.0,
"data_retention": 3600
},
"enabled": True
},
"balanced": {
"name": "Balanced",
"description": "Balance between performance and power",
"settings": {
"cpu_governor": "ondemand",
"gpu_power_target": 200,
"fan_curve": "balanced",
"monitoring_interval": 2.0,
"data_retention": 7200
},
"enabled": True
},
"performance": {
"name": "Performance",
"description": "Optimize for maximum performance",
"settings": {
"cpu_governor": "performance",
"gpu_power_target": 250,
"fan_curve": "performance",
"monitoring_interval": 1.0,
"data_retention": 14400
},
"enabled": True
},
"gaming": {
"name": "Gaming",
"description": "Optimize for gaming performance",
"settings": {
"cpu_governor": "performance",
"gpu_power_target": 250,
"fan_curve": "performance",
"monitoring_interval": 0.5,
"data_retention": 28800,
"disable_screen_blank": True,
"disable_power_save": True
},
"enabled": True
}
},
"default_profile": "balanced",
"monitoring": {
"enabled": True,
"interval": 2.0,
"max_history": 1000
}
}
# Save default config
Path(self.config_file).parent.mkdir(parents=True, exist_ok=True)
with open(self.config_file, 'w') as f:
json.dump(default_config, f, indent=2)
logger.info("Created default optimization configuration")
def apply_profile(self, profile_name: str) -> bool:
"""Apply optimization profile."""
if profile_name not in self.profiles:
logger.error(f"Profile '{profile_name}' not found")
return False
profile = self.profiles[profile_name]
if not profile.enabled:
logger.error(f"Profile '{profile_name}' is disabled")
return False
try:
# Apply CPU governor
cpu_governor = profile.settings.get('cpu_governor')
if cpu_governor:
self.set_cpu_governor(cpu_governor)
# Apply GPU power target
gpu_power = profile.settings.get('gpu_power_target')
if gpu_power:
self.set_gpu_power_target(gpu_power)
# Apply fan curve
fan_curve = profile.settings.get('fan_curve')
if fan_curve:
self.set_fan_curve(fan_curve)
# Apply monitoring settings
monitoring_interval = profile.settings.get('monitoring_interval')
if monitoring_interval:
self.update_monitoring_interval(monitoring_interval)
self.current_profile = profile
logger.info(f"Applied optimization profile: {profile.name}")
return True
except Exception as e:
logger.error(f"Error applying profile '{profile_name}': {e}")
return False
def set_cpu_governor(self, governor: str):
"""Set CPU frequency governor."""
try:
# Get available governors
with open('/sys/devices/system/cpu/cpu0/cpufreq/scaling_available_governors', 'r') as f:
available = f.read().strip().split()
if governor not in available:
logger.warning(f"Governor '{governor}' not available, using 'ondemand'")
governor = 'ondemand'
# Set governor for all CPUs
for cpu in range(psutil.cpu_count()):
governor_file = f'/sys/devices/system/cpu/cpu{cpu}/cpufreq/scaling_governor'
try:
with open(governor_file, 'w') as f:
f.write(governor)
except PermissionError:
logger.warning(f"Cannot set governor for CPU {cpu}, insufficient permissions")
logger.info(f"Set CPU governor to: {governor}")
except Exception as e:
logger.error(f"Error setting CPU governor: {e}")
def set_gpu_power_target(self, power_target: int):
"""Set GPU power target."""
try:
# This would need to be implemented based on the specific GPU driver
# For AMD GPUs, this might involve writing to sysfs files
logger.info(f"Set GPU power target to: {power_target}W")
except Exception as e:
logger.error(f"Error setting GPU power target: {e}")
def set_fan_curve(self, curve_name: str):
"""Set fan curve profile."""
try:
# This would integrate with the fan controller
logger.info(f"Set fan curve to: {curve_name}")
except Exception as e:
logger.error(f"Error setting fan curve: {e}")
def update_monitoring_interval(self, interval: float):
"""Update monitoring interval."""
# This would update the monitoring system interval
logger.info(f"Updated monitoring interval to: {interval}s")
def start_monitoring(self):
"""Start performance monitoring."""
if self.monitoring:
return
self.monitoring = True
self.monitor_thread = threading.Thread(target=self.monitor_loop, daemon=True)
self.monitor_thread.start()
logger.info("Started performance monitoring")
def stop_monitoring(self):
"""Stop performance monitoring."""
self.monitoring = False
if self.monitor_thread:
self.monitor_thread.join()
logger.info("Stopped performance monitoring")
def monitor_loop(self):
"""Main monitoring loop."""
interval = self.profiles.get('monitoring', {}).get('interval', 2.0)
max_history = self.profiles.get('monitoring', {}).get('max_history', 1000)
while self.monitoring:
try:
# Collect system metrics
system_metrics = self.collect_system_metrics()
self.system_history.append(system_metrics)
# Limit history size
if len(self.system_history) > max_history:
self.system_history.pop(0)
# Collect GPU metrics (would integrate with GPU monitoring)
gpu_metrics = self.collect_gpu_metrics()
if gpu_metrics:
self.gpu_history.append(gpu_metrics)
# Limit GPU history size
if len(self.gpu_history) > max_history:
self.gpu_history.pop(0)
time.sleep(interval)
except Exception as e:
logger.error(f"Error in monitoring loop: {e}")
time.sleep(5)
def collect_system_metrics(self) -> SystemMetrics:
"""Collect current system metrics."""
# CPU metrics
cpu_percent = psutil.cpu_percent(interval=1)
load_avg = psutil.getloadavg()
# Memory metrics
memory = psutil.virtual_memory()
memory_percent = memory.percent
# Disk metrics
disk = psutil.disk_usage('/')
disk_percent = (disk.used / disk.total) * 100
# Network metrics
network = psutil.net_io_counters()
# Process count
process_count = len(psutil.pids())
return SystemMetrics(
timestamp=time.time(),
cpu_percent=cpu_percent,
memory_percent=memory_percent,
disk_percent=disk_percent,
network_sent=network.bytes_sent,
network_recv=network.bytes_recv,
process_count=process_count,
load_avg=load_avg
)
def collect_gpu_metrics(self) -> Optional[GPUMetrics]:
"""Collect current GPU metrics."""
try:
# This would integrate with the GPU monitoring system
# For now, return dummy data
return GPUMetrics(
timestamp=time.time(),
temperature=65.0,
load=45.0,
power_draw=150.0,
fan_speed=1800,
memory_usage=60.0,
efficiency=0.3,
core_clock=1500,
memory_clock=1000
)
except:
return None
def get_performance_analytics(self, hours: int = 24) -> Dict[str, Any]:
"""Get performance analytics for the specified time period."""
cutoff_time = time.time() - (hours * 3600)
# Filter recent data
recent_system = [m for m in self.system_history if m.timestamp >= cutoff_time]
recent_gpu = [m for m in self.gpu_history if m.timestamp >= cutoff_time]
if not recent_system:
return {"error": "No performance data available"}
# Calculate system analytics
cpu_values = [m.cpu_percent for m in recent_system]
memory_values = [m.memory_percent for m in recent_system]
disk_values = [m.disk_percent for m in recent_system]
system_analytics = {
"cpu": {
"avg": sum(cpu_values) / len(cpu_values),
"max": max(cpu_values),
"min": min(cpu_values),
"current": cpu_values[-1] if cpu_values else 0
},
"memory": {
"avg": sum(memory_values) / len(memory_values),
"max": max(memory_values),
"min": min(memory_values),
"current": memory_values[-1] if memory_values else 0
},
"disk": {
"avg": sum(disk_values) / len(disk_values),
"max": max(disk_values),
"min": min(disk_values),
"current": disk_values[-1] if disk_values else 0
},
"network": {
"total_sent": recent_system[-1].network_sent - recent_system[0].network_sent if len(recent_system) > 1 else 0,
"total_recv": recent_system[-1].network_recv - recent_system[0].network_recv if len(recent_system) > 1 else 0
},
"processes": {
"avg": sum(m.process_count for m in recent_system) / len(recent_system),
"max": max(m.process_count for m in recent_system),
"min": min(m.process_count for m in recent_system),
"current": recent_system[-1].process_count if recent_system else 0
}
}
# Calculate GPU analytics if available
gpu_analytics = {}
if recent_gpu:
temp_values = [m.temperature for m in recent_gpu]
load_values = [m.load for m in recent_gpu]
power_values = [m.power_draw for m in recent_gpu]
gpu_analytics = {
"temperature": {
"avg": sum(temp_values) / len(temp_values),
"max": max(temp_values),
"min": min(temp_values),
"current": temp_values[-1] if temp_values else 0
},
"load": {
"avg": sum(load_values) / len(load_values),
"max": max(load_values),
"min": min(load_values),
"current": load_values[-1] if load_values else 0
},
"power": {
"avg": sum(power_values) / len(power_values),
"max": max(power_values),
"min": min(power_values),
"current": power_values[-1] if power_values else 0
}
}
return {
"system": system_analytics,
"gpu": gpu_analytics,
"time_range": {
"start": cutoff_time,
"end": time.time(),
"hours": hours
},
"data_points": {
"system": len(recent_system),
"gpu": len(recent_gpu)
}
}
def optimize_for_application(self, app_name: str) -> bool:
"""Optimize system for specific application."""
# Application-specific optimizations
app_profiles = {
"gaming": "gaming",
"video_editing": "performance",
"office": "power_saving",
"browsing": "balanced",
"development": "balanced"
}
profile_name = app_profiles.get(app_name.lower(), "balanced")
return self.apply_profile(profile_name)
def get_recommendations(self) -> List[Dict[str, Any]]:
"""Get performance optimization recommendations."""
recommendations = []
if not self.system_history:
return [{"type": "info", "message": "No performance data available for recommendations"}]
latest_metrics = self.system_history[-1]
# CPU recommendations
if latest_metrics.cpu_percent > 80:
recommendations.append({
"type": "warning",
"category": "CPU",
"message": "High CPU usage detected. Consider closing unnecessary applications or upgrading CPU.",
"action": "Apply performance profile or reduce system load"
})
# Memory recommendations
if latest_metrics.memory_percent > 80:
recommendations.append({
"type": "warning",
"category": "Memory",
"message": "High memory usage detected. Consider closing memory-intensive applications.",
"action": "Close unnecessary applications or add more RAM"
})
# Disk recommendations
if latest_metrics.disk_percent > 90:
recommendations.append({
"type": "critical",
"category": "Disk",
"message": "Disk space critically low. This may impact system performance.",
"action": "Free up disk space immediately"
})
# GPU recommendations (if available)
if self.gpu_history:
latest_gpu = self.gpu_history[-1]
if latest_gpu.temperature > 80:
recommendations.append({
"type": "warning",
"category": "GPU",
"message": "High GPU temperature detected. Check cooling system.",
"action": "Apply performance fan curve or improve cooling"
})
# General recommendations
if not recommendations:
recommendations.append({
"type": "info",
"category": "System",
"message": "System performance appears optimal.",
"action": "Continue monitoring and maintain current settings"
})
return recommendations
def save_performance_report(self, filename: str = None) -> str:
"""Save performance report to file."""
if not filename:
timestamp = time.strftime("%Y%m%d_%H%M%S")
filename = f"performance_report_{timestamp}.json"
analytics = self.get_performance_analytics(24)
recommendations = self.get_recommendations()
report = {
"timestamp": time.time(),
"report_period": "24 hours",
"analytics": analytics,
"recommendations": recommendations,
"current_profile": self.current_profile.name if self.current_profile else "unknown"
}
# Save report
Path("reports").mkdir(exist_ok=True)
report_path = Path("reports") / filename
with open(report_path, 'w') as f:
json.dump(report, f, indent=2, default=str)
logger.info(f"Performance report saved to: {report_path}")
return str(report_path)
class PerformanceAPI:
"""API interface for performance optimization."""
def __init__(self, optimizer: SystemOptimizer):
self.optimizer = optimizer
def get_profiles(self) -> Dict[str, Any]:
"""Get available optimization profiles."""
profiles = {}
for name, profile in self.optimizer.profiles.items():
profiles[name] = {
"name": profile.name,
"description": profile.description,
"settings": profile.settings,
"enabled": profile.enabled
}
return profiles
def apply_profile(self, profile_name: str) -> Dict[str, Any]:
"""Apply optimization profile."""
success = self.optimizer.apply_profile(profile_name)
return {
"success": success,
"message": f"Applied profile: {profile_name}" if success else f"Failed to apply profile: {profile_name}"
}
def get_current_profile(self) -> Dict[str, Any]:
"""Get current optimization profile."""
if self.optimizer.current_profile:
profile = self.optimizer.current_profile
return {
"name": profile.name,
"description": profile.description,
"settings": profile.settings,
"enabled": profile.enabled
}
return {"error": "No profile currently applied"}
def get_performance_analytics(self, hours: int = 24) -> Dict[str, Any]:
"""Get performance analytics."""
return self.optimizer.get_performance_analytics(hours)
def get_recommendations(self) -> List[Dict[str, Any]]:
"""Get optimization recommendations."""
return self.optimizer.get_recommendations()
def optimize_for_application(self, app_name: str) -> Dict[str, Any]:
"""Optimize for specific application."""
success = self.optimizer.optimize_for_application(app_name)
return {
"success": success,
"message": f"Optimized for {app_name}" if success else f"Failed to optimize for {app_name}"
}
def save_report(self, filename: str = None) -> Dict[str, Any]:
"""Save performance report."""
try:
report_path = self.optimizer.save_performance_report(filename)
return {
"success": True,
"report_path": report_path
}
except Exception as e:
return {
"success": False,
"error": str(e)
}
if __name__ == "__main__":
# Test performance optimizer
logging.basicConfig(level=logging.INFO)
optimizer = SystemOptimizer()
# Test profile application
print("Available profiles:")
for name, profile in optimizer.profiles.items():
print(f" {name}: {profile.description}")
if optimizer.apply_profile("performance"):
print("Successfully applied performance profile")
# Start monitoring
optimizer.start_monitoring()
try:
# Collect some data
time.sleep(10)
# Get analytics
analytics = optimizer.get_performance_analytics(1)
print(f"Performance analytics: {analytics}")
# Get recommendations
recommendations = optimizer.get_recommendations()
print(f"Recommendations: {recommendations}")
finally:
optimizer.stop_monitoring()