Spaces:
Sleeping
Sleeping
| """Configuration settings for LLM Inference Dashboard.""" | |
| from dataclasses import dataclass, field | |
| from typing import Optional | |
| import os | |
| class Config: | |
| """Dashboard configuration with sensible defaults.""" | |
| # vLLM Connection | |
| vllm_host: str = "localhost" | |
| vllm_port: int = 8000 | |
| model_path: Optional[str] = None | |
| # Dashboard | |
| refresh_interval: float = 1.0 | |
| history_length: int = 300 # 5 minutes at 1s intervals | |
| # Database | |
| db_path: str = "data/metrics.db" | |
| # Alert Thresholds | |
| alert_kv_cache_threshold: float = 90.0 | |
| alert_gpu_memory_threshold: float = 95.0 | |
| alert_ttft_multiplier: float = 2.0 | |
| alert_throughput_drop_pct: float = 50.0 | |
| # Webhooks | |
| slack_webhook: Optional[str] = None | |
| pagerduty_routing_key: Optional[str] = None | |
| generic_webhooks: list = field(default_factory=list) | |
| # Load Testing Defaults | |
| loadtest_concurrent_users: int = 10 | |
| loadtest_rps: float = 5.0 | |
| loadtest_duration: int = 60 | |
| def metrics_endpoint(self) -> str: | |
| return f"http://{self.vllm_host}:{self.vllm_port}/metrics" | |
| def openai_endpoint(self) -> str: | |
| return f"http://{self.vllm_host}:{self.vllm_port}/v1" | |
| def health_endpoint(self) -> str: | |
| return f"http://{self.vllm_host}:{self.vllm_port}/health" | |
| def from_env(cls) -> "Config": | |
| """Create config from environment variables.""" | |
| return cls( | |
| vllm_host=os.getenv("VLLM_HOST", "localhost"), | |
| vllm_port=int(os.getenv("VLLM_PORT", "8000")), | |
| model_path=os.getenv("MODEL_PATH"), | |
| refresh_interval=float(os.getenv("REFRESH_INTERVAL", "1.0")), | |
| db_path=os.getenv("DB_PATH", "data/metrics.db"), | |
| slack_webhook=os.getenv("SLACK_WEBHOOK"), | |
| pagerduty_routing_key=os.getenv("PAGERDUTY_KEY"), | |
| ) | |
| # Global config instance | |
| config = Config.from_env() | |