File size: 1,278 Bytes
caf53ab | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | # YJ/pipeline/preprocess.py
from __future__ import annotations
from typing import Dict, Any, List
from .models import UtteranceAnalysisRequest, UserProfile, DialogueTurn
def preprocess_dialogue(
request: UtteranceAnalysisRequest,
profile: UserProfile,
) -> Dict[str, Any]:
"""
raw dialogue_log -> DialogueTurn 리스트로 정규화.
+ 분석 대상 child 턴 index 정보 포함.
"""
normalized: List[DialogueTurn] = []
for idx, turn in enumerate(request.dialogue_log):
speaker = turn.get("speaker", "other")
if speaker not in ["caregiver", "child"]:
speaker = "other"
normalized.append(
DialogueTurn(
speaker=speaker,
utterance=turn.get("utterance", ""),
turn_index=idx,
)
)
target_indices = [t.turn_index for t in normalized if t.speaker == "child"]
if not target_indices:
target_indices = [len(normalized) - 1] if normalized else []
analysis_focus = {
"target_speaker": "child",
"target_turn_indices": target_indices,
}
return {
"request": request,
"profile": profile,
"normalized_dialogue": normalized,
"analysis_focus": analysis_focus,
} |