hhh / custom_types.py
github-actions[bot]
Deploy from GitHub Actions (commit: 8b247ffacd77c0672965b8378f1d52a7dcd187ae)
9366995
from typing import TypedDict, List, Optional, Literal, Union
class Utterance(TypedDict):
speaker: str
text: str
# Score types (mutually exclusive)
class CategoricalScore(TypedDict):
"""Categorical evaluation: only label"""
type: Literal["categorical"]
label: str # e.g., "High", "Change", "Positive"
confidence: Optional[float] # Optional: 0-1 confidence if available
class NumericalScore(TypedDict):
"""Numerical evaluation: score with max value"""
type: Literal["numerical"]
value: float # e.g., 3.0, 0.85, 8.5
max_value: float # e.g., 5.0, 1.0, 10.0
label: Optional[str] # Optional: derived label like "High" if value > threshold
# Union type for metric scores
MetricScore = Union[CategoricalScore, NumericalScore]
# Evaluation result structures
class UtteranceScore(TypedDict):
"""Per-utterance evaluation result"""
index: int # Index in original conversation
metrics: dict[str, MetricScore] # e.g., {"talk_type": {...}, "empathy_er": {...}}
class SegmentScore(TypedDict):
"""Multi-utterance segment evaluation result"""
utterance_indices: List[int] # Which utterances this segment covers
metrics: dict[str, MetricScore] # Aggregate metrics for this segment
class EvaluationResult(TypedDict):
"""
Unified evaluation result format.
Based on granularity, only one of overall/per_utterance/per_segment will be populated:
- granularity="utterance": per_utterance has data
- granularity="segment": per_segment has data
- granularity="conversation": overall has data
"""
granularity: Literal["utterance", "segment", "conversation"]
overall: Optional[dict[str, MetricScore]] # Conversation-level scores
per_utterance: Optional[List[UtteranceScore]] # Per-utterance scores
per_segment: Optional[List[SegmentScore]] # Per-segment scores