Spaces:
Sleeping
Sleeping
Upload 10 files
Browse files- diarization.py +25 -1
diarization.py
CHANGED
|
@@ -56,6 +56,29 @@ def _segment_to_payload(start_sec: float, end_sec: float, speaker: str, sample_r
|
|
| 56 |
}
|
| 57 |
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
def run_diarization(wav_path: str, config: VoiceRuntimeConfig, sample_rate: int) -> list[dict[str, Any]]:
|
| 60 |
if not config.diarization_enabled:
|
| 61 |
return []
|
|
@@ -68,7 +91,8 @@ def run_diarization(wav_path: str, config: VoiceRuntimeConfig, sample_rate: int)
|
|
| 68 |
if config.diarization_max_speakers > 0:
|
| 69 |
kwargs["max_speakers"] = config.diarization_max_speakers
|
| 70 |
|
| 71 |
-
|
|
|
|
| 72 |
|
| 73 |
diarization_segments: list[dict[str, Any]] = []
|
| 74 |
for turn, _, speaker in annotation.itertracks(yield_label=True):
|
|
|
|
| 56 |
}
|
| 57 |
|
| 58 |
|
| 59 |
+
def _resolve_annotation(diarization_output: Any) -> Any:
|
| 60 |
+
"""Return an object exposing itertracks(yield_label=True)."""
|
| 61 |
+
if hasattr(diarization_output, "itertracks"):
|
| 62 |
+
return diarization_output
|
| 63 |
+
|
| 64 |
+
# Newer pyannote pipelines may return wrappers like DiarizeOutput.
|
| 65 |
+
for attr in ("speaker_diarization", "annotation", "diarization"):
|
| 66 |
+
candidate = getattr(diarization_output, attr, None)
|
| 67 |
+
if candidate is not None and hasattr(candidate, "itertracks"):
|
| 68 |
+
return candidate
|
| 69 |
+
|
| 70 |
+
if isinstance(diarization_output, dict):
|
| 71 |
+
for key in ("speaker_diarization", "annotation", "diarization"):
|
| 72 |
+
candidate = diarization_output.get(key)
|
| 73 |
+
if candidate is not None and hasattr(candidate, "itertracks"):
|
| 74 |
+
return candidate
|
| 75 |
+
|
| 76 |
+
raise RuntimeError(
|
| 77 |
+
"Unsupported diarization output type "
|
| 78 |
+
f"{type(diarization_output).__name__}; expected Annotation-compatible object."
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
def run_diarization(wav_path: str, config: VoiceRuntimeConfig, sample_rate: int) -> list[dict[str, Any]]:
|
| 83 |
if not config.diarization_enabled:
|
| 84 |
return []
|
|
|
|
| 91 |
if config.diarization_max_speakers > 0:
|
| 92 |
kwargs["max_speakers"] = config.diarization_max_speakers
|
| 93 |
|
| 94 |
+
diarization_output = pipeline(wav_path, **kwargs) if kwargs else pipeline(wav_path)
|
| 95 |
+
annotation = _resolve_annotation(diarization_output)
|
| 96 |
|
| 97 |
diarization_segments: list[dict[str, Any]] = []
|
| 98 |
for turn, _, speaker in annotation.itertracks(yield_label=True):
|