"""Data structures for pipeline stages.""" from dataclasses import dataclass, asdict, field from typing import Optional, List import json @dataclass class TalkerInfo: """Information about a single speaker.""" id: int label: str # e.g. "SPEAKER_00" gender: Optional[str] = None # "male" / "female" / "unknown" / "ambiguous" mean_f0_hz: Optional[float] = None # Fundamental frequency in Hz transcript: Optional[str] = None # Transcribed speech language: Optional[str] = None # Detected language code (e.g. "en", "da") wav_path: Optional[str] = None # Path to extracted source WAV is_toi: bool = False # Talker of interest flag toi_reason: Optional[str] = None # Reasoning for ToI selection direction_deg: Optional[float] = None # Direction of arrival in degrees energy: Optional[float] = None # Energy level selection_score: Optional[float] = None # Numerical score for ToI selection def to_dict(self): """Convert to dictionary, excluding None values for cleaner JSON.""" return {k: v for k, v in asdict(self).items() if v is not None} @dataclass class PipelineOutput: """Complete output from pipeline execution.""" input_file: str approach: str # "ica" / "ica_deeplearning" / "frankenstein" duration_seconds: float sample_rate: int n_speakers: int talker_of_interest: int # Source index (1-indexed) sources: List[TalkerInfo] = field(default_factory=list) # Performance metrics execution_time_seconds: Optional[float] = None separation_method: Optional[str] = None doa_method: Optional[str] = None gender_method: Optional[str] = None asr_model: Optional[str] = None # Optional: Processing chain details processing_notes: Optional[str] = None def to_dict(self): """Convert to dictionary for JSON serialization.""" return { "input_file": self.input_file, "approach": self.approach, "duration_seconds": round(self.duration_seconds, 2), "sample_rate": self.sample_rate, "n_speakers": self.n_speakers, "talker_of_interest": self.talker_of_interest, "execution_time_seconds": round(self.execution_time_seconds, 2) if self.execution_time_seconds else None, "processing_methods": { "separation": self.separation_method, "direction_of_arrival": self.doa_method, "gender_classification": self.gender_method, "asr_model": self.asr_model, }, "sources": [s.to_dict() for s in self.sources], "notes": self.processing_notes, } def to_json(self, indent: int = 2) -> str: """Convert to JSON string.""" return json.dumps(self.to_dict(), indent=indent) @classmethod def from_dict(cls, data: dict): """Reconstruct from dictionary.""" sources = [TalkerInfo(**s) for s in data.get("sources", [])] return cls( input_file=data["input_file"], approach=data["approach"], duration_seconds=data["duration_seconds"], sample_rate=data["sample_rate"], n_speakers=data["n_speakers"], talker_of_interest=data["talker_of_interest"], sources=sources, execution_time_seconds=data.get("execution_time_seconds"), separation_method=data.get("processing_methods", {}).get("separation"), doa_method=data.get("processing_methods", {}).get("direction_of_arrival"), gender_method=data.get("processing_methods", {}).get("gender_classification"), asr_model=data.get("processing_methods", {}).get("asr_model"), processing_notes=data.get("notes"), )