Spaces:
Running
Running
File size: 1,961 Bytes
7b4b748 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | from __future__ import annotations
from pydantic import BaseModel, Field
class ChatRequest(BaseModel):
message: str = Field(..., min_length=1, max_length=4000)
session_id: str = Field(default="default", min_length=1, max_length=128)
class ChatResponse(BaseModel):
session_id: str
response: str
model: str
latency_ms: float
estimated_tokens: int
estimated_cost_usd: float
tools_used: list[str] = Field(default_factory=list)
guardrail_blocks: list[str] = Field(default_factory=list)
class SessionResetResponse(BaseModel):
session_id: str
cleared: bool
class HealthResponse(BaseModel):
status: str
model_id: str
sessions: int
class CostPricing(BaseModel):
cpu_hour_usd: float
tokens_per_char: float
token_proxy_usd_per_char: float
deployment: str
class MetricsResponse(BaseModel):
pricing: CostPricing
api_cost_consumed_usd: float
total_requests: int
estimate_tooltip: str
class InferenceMetricsResponse(BaseModel):
samples: int
ttft_ms: float | None = None
tbt_ms: float | None = None
tokens_per_sec: float | None = None
latency_p50_ms: float
latency_p95_ms: float
avg_input_tokens: float
avg_output_tokens: float
class TracesListResponse(BaseModel):
stats: dict
traces: list[dict]
class TraceDetailResponse(BaseModel):
trace_id: str
spans: list[dict]
class EvalRunRequest(BaseModel):
benchmark_samples: int = Field(default=10, ge=1, le=20)
seed: int = Field(default=42)
assistants: list[str] = Field(default_factory=lambda: ["oss"])
class EvalMetricScore(BaseModel):
metric: str
label: str
percent: float
total: int
class EvalAssistantResult(BaseModel):
assistant: str
model_id: str
metrics: list[EvalMetricScore]
class EvalRunResponse(BaseModel):
generated_at: str
judge_model: str
results: list[EvalAssistantResult]
markdown_report: str
|