Spaces:
Running
Running
| from __future__ import annotations | |
| from pydantic import BaseModel, Field | |
| class ChatRequest(BaseModel): | |
| message: str = Field(..., min_length=1, max_length=4000) | |
| session_id: str = Field(default="default", min_length=1, max_length=128) | |
| class ChatResponse(BaseModel): | |
| session_id: str | |
| response: str | |
| model: str | |
| latency_ms: float | |
| estimated_tokens: int | |
| estimated_cost_usd: float | |
| tools_used: list[str] = Field(default_factory=list) | |
| guardrail_blocks: list[str] = Field(default_factory=list) | |
| class SessionResetResponse(BaseModel): | |
| session_id: str | |
| cleared: bool | |
| class HealthResponse(BaseModel): | |
| status: str | |
| model_id: str | |
| sessions: int | |
| class CostPricing(BaseModel): | |
| cpu_hour_usd: float | |
| tokens_per_char: float | |
| token_proxy_usd_per_char: float | |
| deployment: str | |
| class MetricsResponse(BaseModel): | |
| pricing: CostPricing | |
| api_cost_consumed_usd: float | |
| total_requests: int | |
| estimate_tooltip: str | |
| class InferenceMetricsResponse(BaseModel): | |
| samples: int | |
| ttft_ms: float | None = None | |
| tbt_ms: float | None = None | |
| tokens_per_sec: float | None = None | |
| latency_p50_ms: float | |
| latency_p95_ms: float | |
| avg_input_tokens: float | |
| avg_output_tokens: float | |
| class TracesListResponse(BaseModel): | |
| stats: dict | |
| traces: list[dict] | |
| class TraceDetailResponse(BaseModel): | |
| trace_id: str | |
| spans: list[dict] | |
| class EvalRunRequest(BaseModel): | |
| benchmark_samples: int = Field(default=10, ge=1, le=20) | |
| seed: int = Field(default=42) | |
| assistants: list[str] = Field(default_factory=lambda: ["oss"]) | |
| class EvalMetricScore(BaseModel): | |
| metric: str | |
| label: str | |
| percent: float | |
| total: int | |
| class EvalAssistantResult(BaseModel): | |
| assistant: str | |
| model_id: str | |
| metrics: list[EvalMetricScore] | |
| class EvalRunResponse(BaseModel): | |
| generated_at: str | |
| judge_model: str | |
| results: list[EvalAssistantResult] | |
| markdown_report: str | |