Spaces:

Hedrekao
/

audio-explorers-visualization

Sleeping

audio-explorers-visualization / pipeline_modules /data_structures.py

hedrekao

HF deploy: clean snapshot without local artifacts

a361db3 about 1 month ago

3.9 kB

	"""Data structures for pipeline stages."""

	from dataclasses import dataclass, asdict, field
	from typing import Optional, List
	import json


	@dataclass
	class TalkerInfo:
	"""Information about a single speaker."""
	id: int
	label: str # e.g. "SPEAKER_00"
	gender: Optional[str] = None # "male" / "female" / "unknown" / "ambiguous"
	mean_f0_hz: Optional[float] = None # Fundamental frequency in Hz
	transcript: Optional[str] = None # Transcribed speech
	language: Optional[str] = None # Detected language code (e.g. "en", "da")
	wav_path: Optional[str] = None # Path to extracted source WAV
	is_toi: bool = False # Talker of interest flag
	toi_reason: Optional[str] = None # Reasoning for ToI selection
	direction_deg: Optional[float] = None # Direction of arrival in degrees
	energy: Optional[float] = None # Energy level
	selection_score: Optional[float] = None # Numerical score for ToI selection

	def to_dict(self):
	"""Convert to dictionary, excluding None values for cleaner JSON."""
	return {k: v for k, v in asdict(self).items() if v is not None}


	@dataclass
	class PipelineOutput:
	"""Complete output from pipeline execution."""
	input_file: str
	approach: str # "ica" / "ica_deeplearning" / "frankenstein"
	duration_seconds: float
	sample_rate: int
	n_speakers: int
	talker_of_interest: int # Source index (1-indexed)
	sources: List[TalkerInfo] = field(default_factory=list)

	# Performance metrics
	execution_time_seconds: Optional[float] = None
	separation_method: Optional[str] = None
	doa_method: Optional[str] = None
	gender_method: Optional[str] = None
	asr_model: Optional[str] = None

	# Optional: Processing chain details
	processing_notes: Optional[str] = None

	def to_dict(self):
	"""Convert to dictionary for JSON serialization."""
	return {
	"input_file": self.input_file,
	"approach": self.approach,
	"duration_seconds": round(self.duration_seconds, 2),
	"sample_rate": self.sample_rate,
	"n_speakers": self.n_speakers,
	"talker_of_interest": self.talker_of_interest,
	"execution_time_seconds": round(self.execution_time_seconds, 2) if self.execution_time_seconds else None,
	"processing_methods": {
	"separation": self.separation_method,
	"direction_of_arrival": self.doa_method,
	"gender_classification": self.gender_method,
	"asr_model": self.asr_model,
	},
	"sources": [s.to_dict() for s in self.sources],
	"notes": self.processing_notes,
	}

	def to_json(self, indent: int = 2) -> str:
	"""Convert to JSON string."""
	return json.dumps(self.to_dict(), indent=indent)

	@classmethod
	def from_dict(cls, data: dict):
	"""Reconstruct from dictionary."""
	sources = [TalkerInfo(**s) for s in data.get("sources", [])]
	return cls(
	input_file=data["input_file"],
	approach=data["approach"],
	duration_seconds=data["duration_seconds"],
	sample_rate=data["sample_rate"],
	n_speakers=data["n_speakers"],
	talker_of_interest=data["talker_of_interest"],
	sources=sources,
	execution_time_seconds=data.get("execution_time_seconds"),
	separation_method=data.get("processing_methods", {}).get("separation"),
	doa_method=data.get("processing_methods", {}).get("direction_of_arrival"),
	gender_method=data.get("processing_methods", {}).get("gender_classification"),
	asr_model=data.get("processing_methods", {}).get("asr_model"),
	processing_notes=data.get("notes"),
	)