jkottu's picture
Initial commit: LLM Inference Dashboard
aefabf0
"""Configuration settings for LLM Inference Dashboard."""
from dataclasses import dataclass, field
from typing import Optional
import os
@dataclass
class Config:
"""Dashboard configuration with sensible defaults."""
# vLLM Connection
vllm_host: str = "localhost"
vllm_port: int = 8000
model_path: Optional[str] = None
# Dashboard
refresh_interval: float = 1.0
history_length: int = 300 # 5 minutes at 1s intervals
# Database
db_path: str = "data/metrics.db"
# Alert Thresholds
alert_kv_cache_threshold: float = 90.0
alert_gpu_memory_threshold: float = 95.0
alert_ttft_multiplier: float = 2.0
alert_throughput_drop_pct: float = 50.0
# Webhooks
slack_webhook: Optional[str] = None
pagerduty_routing_key: Optional[str] = None
generic_webhooks: list = field(default_factory=list)
# Load Testing Defaults
loadtest_concurrent_users: int = 10
loadtest_rps: float = 5.0
loadtest_duration: int = 60
@property
def metrics_endpoint(self) -> str:
return f"http://{self.vllm_host}:{self.vllm_port}/metrics"
@property
def openai_endpoint(self) -> str:
return f"http://{self.vllm_host}:{self.vllm_port}/v1"
@property
def health_endpoint(self) -> str:
return f"http://{self.vllm_host}:{self.vllm_port}/health"
@classmethod
def from_env(cls) -> "Config":
"""Create config from environment variables."""
return cls(
vllm_host=os.getenv("VLLM_HOST", "localhost"),
vllm_port=int(os.getenv("VLLM_PORT", "8000")),
model_path=os.getenv("MODEL_PATH"),
refresh_interval=float(os.getenv("REFRESH_INTERVAL", "1.0")),
db_path=os.getenv("DB_PATH", "data/metrics.db"),
slack_webhook=os.getenv("SLACK_WEBHOOK"),
pagerduty_routing_key=os.getenv("PAGERDUTY_KEY"),
)
# Global config instance
config = Config.from_env()