Spaces:
Sleeping
Sleeping
| """Data models for storage layer.""" | |
| from dataclasses import dataclass, field | |
| from datetime import datetime | |
| from typing import Optional, Dict, Any | |
| import json | |
| class MetricRecord: | |
| """A single metric record for storage.""" | |
| metric_name: str | |
| value: float | |
| timestamp: datetime = field(default_factory=datetime.now) | |
| labels: Dict[str, str] = field(default_factory=dict) | |
| id: Optional[int] = None | |
| def to_dict(self) -> Dict[str, Any]: | |
| return { | |
| "id": self.id, | |
| "metric_name": self.metric_name, | |
| "value": self.value, | |
| "timestamp": self.timestamp.isoformat(), | |
| "labels": self.labels, | |
| } | |
| def from_row(cls, row: tuple) -> "MetricRecord": | |
| return cls( | |
| id=row[0], | |
| timestamp=datetime.fromisoformat(row[1]), | |
| metric_name=row[2], | |
| value=row[3], | |
| labels=json.loads(row[4]) if row[4] else {}, | |
| ) | |
| class AlertRecord: | |
| """An alert record for storage.""" | |
| rule_name: str | |
| severity: str | |
| metric_name: str | |
| value: float | |
| threshold: float | |
| message: str | |
| timestamp: datetime = field(default_factory=datetime.now) | |
| resolved_at: Optional[datetime] = None | |
| id: Optional[int] = None | |
| def to_dict(self) -> Dict[str, Any]: | |
| return { | |
| "id": self.id, | |
| "rule_name": self.rule_name, | |
| "severity": self.severity, | |
| "metric_name": self.metric_name, | |
| "value": self.value, | |
| "threshold": self.threshold, | |
| "message": self.message, | |
| "timestamp": self.timestamp.isoformat(), | |
| "resolved_at": self.resolved_at.isoformat() if self.resolved_at else None, | |
| } | |
| def from_row(cls, row: tuple) -> "AlertRecord": | |
| return cls( | |
| id=row[0], | |
| timestamp=datetime.fromisoformat(row[1]), | |
| rule_name=row[2], | |
| severity=row[3], | |
| metric_name=row[4], | |
| value=row[5], | |
| threshold=row[6], | |
| message=row[7] if len(row) > 7 else "", | |
| resolved_at=datetime.fromisoformat(row[8]) if len(row) > 8 and row[8] else None, | |
| ) | |
| class RequestTrace: | |
| """A request trace for latency analysis.""" | |
| request_id: str | |
| prompt_tokens: int | |
| output_tokens: int | |
| queue_time_ms: float | |
| prefill_time_ms: float | |
| decode_time_ms: float | |
| total_time_ms: float | |
| tokens_per_second: float | |
| gpu_memory_at_start: float = 0.0 | |
| gpu_memory_at_end: float = 0.0 | |
| is_slow: bool = False | |
| timestamp: datetime = field(default_factory=datetime.now) | |
| id: Optional[int] = None | |
| def to_dict(self) -> Dict[str, Any]: | |
| return { | |
| "id": self.id, | |
| "request_id": self.request_id, | |
| "timestamp": self.timestamp.isoformat(), | |
| "prompt_tokens": self.prompt_tokens, | |
| "output_tokens": self.output_tokens, | |
| "queue_time_ms": round(self.queue_time_ms, 2), | |
| "prefill_time_ms": round(self.prefill_time_ms, 2), | |
| "decode_time_ms": round(self.decode_time_ms, 2), | |
| "total_time_ms": round(self.total_time_ms, 2), | |
| "tokens_per_second": round(self.tokens_per_second, 2), | |
| "is_slow": self.is_slow, | |
| } | |
| def from_row(cls, row: tuple) -> "RequestTrace": | |
| return cls( | |
| id=row[0], | |
| request_id=row[1], | |
| timestamp=datetime.fromisoformat(row[2]), | |
| prompt_tokens=row[3], | |
| output_tokens=row[4], | |
| queue_time_ms=row[5], | |
| prefill_time_ms=row[6], | |
| decode_time_ms=row[7], | |
| total_time_ms=row[8], | |
| tokens_per_second=row[9] if len(row) > 9 else 0, | |
| is_slow=bool(row[10]) if len(row) > 10 else False, | |
| ) | |
| class LoadTestResult: | |
| """Results from a load test run.""" | |
| test_id: str | |
| target_endpoint: str | |
| concurrent_users: int | |
| requests_per_second: float | |
| duration_seconds: int | |
| total_requests: int | |
| successful_requests: int | |
| failed_requests: int | |
| avg_latency_ms: float | |
| p50_latency_ms: float | |
| p95_latency_ms: float | |
| p99_latency_ms: float | |
| throughput_rps: float | |
| saturation_point: Optional[float] = None | |
| timestamp: datetime = field(default_factory=datetime.now) | |
| id: Optional[int] = None | |
| def to_dict(self) -> Dict[str, Any]: | |
| return { | |
| "test_id": self.test_id, | |
| "target_endpoint": self.target_endpoint, | |
| "concurrent_users": self.concurrent_users, | |
| "requests_per_second": self.requests_per_second, | |
| "duration_seconds": self.duration_seconds, | |
| "total_requests": self.total_requests, | |
| "successful_requests": self.successful_requests, | |
| "failed_requests": self.failed_requests, | |
| "avg_latency_ms": round(self.avg_latency_ms, 2), | |
| "p50_latency_ms": round(self.p50_latency_ms, 2), | |
| "p95_latency_ms": round(self.p95_latency_ms, 2), | |
| "p99_latency_ms": round(self.p99_latency_ms, 2), | |
| "throughput_rps": round(self.throughput_rps, 2), | |
| "saturation_point": self.saturation_point, | |
| "timestamp": self.timestamp.isoformat(), | |
| } | |