github-actions[bot]
Deploy from GitHub Actions (commit: 8b247ffacd77c0672965b8378f1d52a7dcd187ae)
9366995
| from typing import TypedDict, List, Optional, Literal, Union | |
| class Utterance(TypedDict): | |
| speaker: str | |
| text: str | |
| # Score types (mutually exclusive) | |
| class CategoricalScore(TypedDict): | |
| """Categorical evaluation: only label""" | |
| type: Literal["categorical"] | |
| label: str # e.g., "High", "Change", "Positive" | |
| confidence: Optional[float] # Optional: 0-1 confidence if available | |
| class NumericalScore(TypedDict): | |
| """Numerical evaluation: score with max value""" | |
| type: Literal["numerical"] | |
| value: float # e.g., 3.0, 0.85, 8.5 | |
| max_value: float # e.g., 5.0, 1.0, 10.0 | |
| label: Optional[str] # Optional: derived label like "High" if value > threshold | |
| # Union type for metric scores | |
| MetricScore = Union[CategoricalScore, NumericalScore] | |
| # Evaluation result structures | |
| class UtteranceScore(TypedDict): | |
| """Per-utterance evaluation result""" | |
| index: int # Index in original conversation | |
| metrics: dict[str, MetricScore] # e.g., {"talk_type": {...}, "empathy_er": {...}} | |
| class SegmentScore(TypedDict): | |
| """Multi-utterance segment evaluation result""" | |
| utterance_indices: List[int] # Which utterances this segment covers | |
| metrics: dict[str, MetricScore] # Aggregate metrics for this segment | |
| class EvaluationResult(TypedDict): | |
| """ | |
| Unified evaluation result format. | |
| Based on granularity, only one of overall/per_utterance/per_segment will be populated: | |
| - granularity="utterance": per_utterance has data | |
| - granularity="segment": per_segment has data | |
| - granularity="conversation": overall has data | |
| """ | |
| granularity: Literal["utterance", "segment", "conversation"] | |
| overall: Optional[dict[str, MetricScore]] # Conversation-level scores | |
| per_utterance: Optional[List[UtteranceScore]] # Per-utterance scores | |
| per_segment: Optional[List[SegmentScore]] # Per-segment scores | |