# YJ/pipeline/preprocess.py from __future__ import annotations from typing import Dict, Any, List from .models import UtteranceAnalysisRequest, UserProfile, DialogueTurn def preprocess_dialogue( request: UtteranceAnalysisRequest, profile: UserProfile, ) -> Dict[str, Any]: """ raw dialogue_log -> DialogueTurn 리스트로 정규화. + 분석 대상 child 턴 index 정보 포함. """ normalized: List[DialogueTurn] = [] for idx, turn in enumerate(request.dialogue_log): speaker = turn.get("speaker", "other") if speaker not in ["caregiver", "child"]: speaker = "other" normalized.append( DialogueTurn( speaker=speaker, utterance=turn.get("utterance", ""), turn_index=idx, ) ) target_indices = [t.turn_index for t in normalized if t.speaker == "child"] if not target_indices: target_indices = [len(normalized) - 1] if normalized else [] analysis_focus = { "target_speaker": "child", "target_turn_indices": target_indices, } return { "request": request, "profile": profile, "normalized_dialogue": normalized, "analysis_focus": analysis_focus, }