Spaces:

jkottu
/

llm-inference-dashboard

Sleeping

App Files Files Community

llm-inference-dashboard / config.py

jkottu

Initial commit: LLM Inference Dashboard

aefabf0 2 months ago

raw

history blame contribute delete

1.96 kB

	"""Configuration settings for LLM Inference Dashboard."""

	from dataclasses import dataclass, field
	from typing import Optional
	import os


	@dataclass
	class Config:
	"""Dashboard configuration with sensible defaults."""

	# vLLM Connection
	vllm_host: str = "localhost"
	vllm_port: int = 8000
	model_path: Optional[str] = None

	# Dashboard
	refresh_interval: float = 1.0
	history_length: int = 300 # 5 minutes at 1s intervals

	# Database
	db_path: str = "data/metrics.db"

	# Alert Thresholds
	alert_kv_cache_threshold: float = 90.0
	alert_gpu_memory_threshold: float = 95.0
	alert_ttft_multiplier: float = 2.0
	alert_throughput_drop_pct: float = 50.0

	# Webhooks
	slack_webhook: Optional[str] = None
	pagerduty_routing_key: Optional[str] = None
	generic_webhooks: list = field(default_factory=list)

	# Load Testing Defaults
	loadtest_concurrent_users: int = 10
	loadtest_rps: float = 5.0
	loadtest_duration: int = 60

	@property
	def metrics_endpoint(self) -> str:
	return f"http://{self.vllm_host}:{self.vllm_port}/metrics"

	@property
	def openai_endpoint(self) -> str:
	return f"http://{self.vllm_host}:{self.vllm_port}/v1"

	@property
	def health_endpoint(self) -> str:
	return f"http://{self.vllm_host}:{self.vllm_port}/health"

	@classmethod
	def from_env(cls) -> "Config":
	"""Create config from environment variables."""
	return cls(
	vllm_host=os.getenv("VLLM_HOST", "localhost"),
	vllm_port=int(os.getenv("VLLM_PORT", "8000")),
	model_path=os.getenv("MODEL_PATH"),
	refresh_interval=float(os.getenv("REFRESH_INTERVAL", "1.0")),
	db_path=os.getenv("DB_PATH", "data/metrics.db"),
	slack_webhook=os.getenv("SLACK_WEBHOOK"),
	pagerduty_routing_key=os.getenv("PAGERDUTY_KEY"),
	)


	# Global config instance
	config = Config.from_env()