ollive-api / evaluation /metrics.py
Karthik Namboori
Deploy ollive FastAPI Docker Space
7b4b748
import logging
from dataclasses import dataclass, field
from assistants.base import AssistantResponse, BaseAssistant
logger = logging.getLogger(__name__)
@dataclass
class TurnRecord:
user: str
assistant: str
latency_ms: float
model: str
error: str | None = None
@dataclass
class SessionMetrics:
name: str
turns: list[TurnRecord] = field(default_factory=list)
@property
def total_turns(self) -> int:
return len(self.turns)
@property
def successful_turns(self) -> int:
return sum(1 for t in self.turns if t.error is None)
@property
def error_rate(self) -> float:
if not self.turns:
return 0.0
return 1.0 - (self.successful_turns / len(self.turns))
@property
def avg_latency_ms(self) -> float:
if not self.turns:
return 0.0
return sum(t.latency_ms for t in self.turns) / len(self.turns)
def record(self, user: str, response: AssistantResponse) -> None:
self.turns.append(
TurnRecord(
user=user,
assistant=response.text,
latency_ms=response.latency_ms,
model=response.model,
error=response.error,
)
)
def summary(self) -> str:
return (
f"**{self.name}**\n"
f"- Model: `{self.turns[-1].model if self.turns else 'n/a'}`\n"
f"- Turns: {self.total_turns}\n"
f"- Success rate: {self.successful_turns}/{self.total_turns}\n"
f"- Avg latency: {self.avg_latency_ms:.0f} ms\n"
f"- Error rate: {self.error_rate:.0%}"
)
class TrackedAssistant:
"""Wraps an assistant to collect lightweight evaluation metrics."""
def __init__(self, assistant: BaseAssistant, name: str) -> None:
self.assistant = assistant
self.metrics = SessionMetrics(name=name)
def chat(self, user_input: str) -> AssistantResponse:
response = self.assistant.chat(user_input)
self.metrics.record(user_input, response)
return response
def reset(self) -> None:
self.assistant.reset()
self.metrics = SessionMetrics(name=self.metrics.name)