unknownfriend00007 commited on
Commit
540cd4c
·
verified ·
1 Parent(s): 0d0b668

Upload 10 files

Browse files
Files changed (1) hide show
  1. diarization.py +25 -1
diarization.py CHANGED
@@ -56,6 +56,29 @@ def _segment_to_payload(start_sec: float, end_sec: float, speaker: str, sample_r
56
  }
57
 
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  def run_diarization(wav_path: str, config: VoiceRuntimeConfig, sample_rate: int) -> list[dict[str, Any]]:
60
  if not config.diarization_enabled:
61
  return []
@@ -68,7 +91,8 @@ def run_diarization(wav_path: str, config: VoiceRuntimeConfig, sample_rate: int)
68
  if config.diarization_max_speakers > 0:
69
  kwargs["max_speakers"] = config.diarization_max_speakers
70
 
71
- annotation = pipeline(wav_path, **kwargs) if kwargs else pipeline(wav_path)
 
72
 
73
  diarization_segments: list[dict[str, Any]] = []
74
  for turn, _, speaker in annotation.itertracks(yield_label=True):
 
56
  }
57
 
58
 
59
+ def _resolve_annotation(diarization_output: Any) -> Any:
60
+ """Return an object exposing itertracks(yield_label=True)."""
61
+ if hasattr(diarization_output, "itertracks"):
62
+ return diarization_output
63
+
64
+ # Newer pyannote pipelines may return wrappers like DiarizeOutput.
65
+ for attr in ("speaker_diarization", "annotation", "diarization"):
66
+ candidate = getattr(diarization_output, attr, None)
67
+ if candidate is not None and hasattr(candidate, "itertracks"):
68
+ return candidate
69
+
70
+ if isinstance(diarization_output, dict):
71
+ for key in ("speaker_diarization", "annotation", "diarization"):
72
+ candidate = diarization_output.get(key)
73
+ if candidate is not None and hasattr(candidate, "itertracks"):
74
+ return candidate
75
+
76
+ raise RuntimeError(
77
+ "Unsupported diarization output type "
78
+ f"{type(diarization_output).__name__}; expected Annotation-compatible object."
79
+ )
80
+
81
+
82
  def run_diarization(wav_path: str, config: VoiceRuntimeConfig, sample_rate: int) -> list[dict[str, Any]]:
83
  if not config.diarization_enabled:
84
  return []
 
91
  if config.diarization_max_speakers > 0:
92
  kwargs["max_speakers"] = config.diarization_max_speakers
93
 
94
+ diarization_output = pipeline(wav_path, **kwargs) if kwargs else pipeline(wav_path)
95
+ annotation = _resolve_annotation(diarization_output)
96
 
97
  diarization_segments: list[dict[str, Any]] = []
98
  for turn, _, speaker in annotation.itertracks(yield_label=True):