File size: 1,961 Bytes
7b4b748
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from __future__ import annotations

from pydantic import BaseModel, Field


class ChatRequest(BaseModel):
    message: str = Field(..., min_length=1, max_length=4000)
    session_id: str = Field(default="default", min_length=1, max_length=128)


class ChatResponse(BaseModel):
    session_id: str
    response: str
    model: str
    latency_ms: float
    estimated_tokens: int
    estimated_cost_usd: float
    tools_used: list[str] = Field(default_factory=list)
    guardrail_blocks: list[str] = Field(default_factory=list)


class SessionResetResponse(BaseModel):
    session_id: str
    cleared: bool


class HealthResponse(BaseModel):
    status: str
    model_id: str
    sessions: int


class CostPricing(BaseModel):
    cpu_hour_usd: float
    tokens_per_char: float
    token_proxy_usd_per_char: float
    deployment: str


class MetricsResponse(BaseModel):
    pricing: CostPricing
    api_cost_consumed_usd: float
    total_requests: int
    estimate_tooltip: str


class InferenceMetricsResponse(BaseModel):
    samples: int
    ttft_ms: float | None = None
    tbt_ms: float | None = None
    tokens_per_sec: float | None = None
    latency_p50_ms: float
    latency_p95_ms: float
    avg_input_tokens: float
    avg_output_tokens: float


class TracesListResponse(BaseModel):
    stats: dict
    traces: list[dict]


class TraceDetailResponse(BaseModel):
    trace_id: str
    spans: list[dict]


class EvalRunRequest(BaseModel):
    benchmark_samples: int = Field(default=10, ge=1, le=20)
    seed: int = Field(default=42)
    assistants: list[str] = Field(default_factory=lambda: ["oss"])


class EvalMetricScore(BaseModel):
    metric: str
    label: str
    percent: float
    total: int


class EvalAssistantResult(BaseModel):
    assistant: str
    model_id: str
    metrics: list[EvalMetricScore]


class EvalRunResponse(BaseModel):
    generated_at: str
    judge_model: str
    results: list[EvalAssistantResult]
    markdown_report: str