| # YJ/pipeline/preprocess.py | |
| from __future__ import annotations | |
| from typing import Dict, Any, List | |
| from .models import UtteranceAnalysisRequest, UserProfile, DialogueTurn | |
| def preprocess_dialogue( | |
| request: UtteranceAnalysisRequest, | |
| profile: UserProfile, | |
| ) -> Dict[str, Any]: | |
| """ | |
| raw dialogue_log -> DialogueTurn 리스트로 정규화. | |
| + 분석 대상 child 턴 index 정보 포함. | |
| """ | |
| normalized: List[DialogueTurn] = [] | |
| for idx, turn in enumerate(request.dialogue_log): | |
| speaker = turn.get("speaker", "other") | |
| if speaker not in ["caregiver", "child"]: | |
| speaker = "other" | |
| normalized.append( | |
| DialogueTurn( | |
| speaker=speaker, | |
| utterance=turn.get("utterance", ""), | |
| turn_index=idx, | |
| ) | |
| ) | |
| target_indices = [t.turn_index for t in normalized if t.speaker == "child"] | |
| if not target_indices: | |
| target_indices = [len(normalized) - 1] if normalized else [] | |
| analysis_focus = { | |
| "target_speaker": "child", | |
| "target_turn_indices": target_indices, | |
| } | |
| return { | |
| "request": request, | |
| "profile": profile, | |
| "normalized_dialogue": normalized, | |
| "analysis_focus": analysis_focus, | |
| } |