| """Data structures for pipeline stages.""" |
|
|
| from dataclasses import dataclass, asdict, field |
| from typing import Optional, List |
| import json |
|
|
|
|
| @dataclass |
| class TalkerInfo: |
| """Information about a single speaker.""" |
| id: int |
| label: str |
| gender: Optional[str] = None |
| mean_f0_hz: Optional[float] = None |
| transcript: Optional[str] = None |
| language: Optional[str] = None |
| wav_path: Optional[str] = None |
| is_toi: bool = False |
| toi_reason: Optional[str] = None |
| direction_deg: Optional[float] = None |
| energy: Optional[float] = None |
| selection_score: Optional[float] = None |
|
|
| def to_dict(self): |
| """Convert to dictionary, excluding None values for cleaner JSON.""" |
| return {k: v for k, v in asdict(self).items() if v is not None} |
|
|
|
|
| @dataclass |
| class PipelineOutput: |
| """Complete output from pipeline execution.""" |
| input_file: str |
| approach: str |
| duration_seconds: float |
| sample_rate: int |
| n_speakers: int |
| talker_of_interest: int |
| sources: List[TalkerInfo] = field(default_factory=list) |
| |
| |
| execution_time_seconds: Optional[float] = None |
| separation_method: Optional[str] = None |
| doa_method: Optional[str] = None |
| gender_method: Optional[str] = None |
| asr_model: Optional[str] = None |
| |
| |
| processing_notes: Optional[str] = None |
|
|
| def to_dict(self): |
| """Convert to dictionary for JSON serialization.""" |
| return { |
| "input_file": self.input_file, |
| "approach": self.approach, |
| "duration_seconds": round(self.duration_seconds, 2), |
| "sample_rate": self.sample_rate, |
| "n_speakers": self.n_speakers, |
| "talker_of_interest": self.talker_of_interest, |
| "execution_time_seconds": round(self.execution_time_seconds, 2) if self.execution_time_seconds else None, |
| "processing_methods": { |
| "separation": self.separation_method, |
| "direction_of_arrival": self.doa_method, |
| "gender_classification": self.gender_method, |
| "asr_model": self.asr_model, |
| }, |
| "sources": [s.to_dict() for s in self.sources], |
| "notes": self.processing_notes, |
| } |
|
|
| def to_json(self, indent: int = 2) -> str: |
| """Convert to JSON string.""" |
| return json.dumps(self.to_dict(), indent=indent) |
|
|
| @classmethod |
| def from_dict(cls, data: dict): |
| """Reconstruct from dictionary.""" |
| sources = [TalkerInfo(**s) for s in data.get("sources", [])] |
| return cls( |
| input_file=data["input_file"], |
| approach=data["approach"], |
| duration_seconds=data["duration_seconds"], |
| sample_rate=data["sample_rate"], |
| n_speakers=data["n_speakers"], |
| talker_of_interest=data["talker_of_interest"], |
| sources=sources, |
| execution_time_seconds=data.get("execution_time_seconds"), |
| separation_method=data.get("processing_methods", {}).get("separation"), |
| doa_method=data.get("processing_methods", {}).get("direction_of_arrival"), |
| gender_method=data.get("processing_methods", {}).get("gender_classification"), |
| asr_model=data.get("processing_methods", {}).get("asr_model"), |
| processing_notes=data.get("notes"), |
| ) |
|
|