coconut / src /pipeline /preprocess.py
alohaboy
feat: Add LLM-based chat mode and integrate YJ pipeline
caf53ab
# YJ/pipeline/preprocess.py
from __future__ import annotations
from typing import Dict, Any, List
from .models import UtteranceAnalysisRequest, UserProfile, DialogueTurn
def preprocess_dialogue(
request: UtteranceAnalysisRequest,
profile: UserProfile,
) -> Dict[str, Any]:
"""
raw dialogue_log -> DialogueTurn 리스트로 정규화.
+ 분석 대상 child 턴 index 정보 포함.
"""
normalized: List[DialogueTurn] = []
for idx, turn in enumerate(request.dialogue_log):
speaker = turn.get("speaker", "other")
if speaker not in ["caregiver", "child"]:
speaker = "other"
normalized.append(
DialogueTurn(
speaker=speaker,
utterance=turn.get("utterance", ""),
turn_index=idx,
)
)
target_indices = [t.turn_index for t in normalized if t.speaker == "child"]
if not target_indices:
target_indices = [len(normalized) - 1] if normalized else []
analysis_focus = {
"target_speaker": "child",
"target_turn_indices": target_indices,
}
return {
"request": request,
"profile": profile,
"normalized_dialogue": normalized,
"analysis_focus": analysis_focus,
}