""" Stage 5 — Speaker Profiler (Simplified — Male Voice Forced) Since we force male voice, this just groups speakers. """ import logging from pathlib import Path from typing import Dict, List from collections import defaultdict logger = logging.getLogger(__name__) def profile_speakers( segments: List[Dict], audio_path: Path, output_dir: Path ) -> Dict[str, Dict]: """Profile speakers — all forced to male.""" speaker_segments = defaultdict(list) for seg in segments: speaker_segments[seg.get("speaker", "SPEAKER_00")].append(seg) logger.info(f"Found {len(speaker_segments)} speakers — ALL forced to MALE voice") profiles = {} for speaker_id, segs in speaker_segments.items(): total_time = sum(s["end"] - s["start"] for s in segs) profiles[speaker_id] = { "gender": "male", # FORCED "reference_audio": None, "total_speaking_time": round(total_time, 2), "avg_pitch": 0, "segment_count": len(segs), } logger.info(f" {speaker_id}: MALE (forced), {len(segs)} segments, {total_time:.1f}s") return profiles