File size: 1,278 Bytes
caf53ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# YJ/pipeline/preprocess.py

from __future__ import annotations
from typing import Dict, Any, List

from .models import UtteranceAnalysisRequest, UserProfile, DialogueTurn


def preprocess_dialogue(
    request: UtteranceAnalysisRequest,
    profile: UserProfile,
) -> Dict[str, Any]:
    """
    raw dialogue_log -> DialogueTurn 리스트로 정규화.
    + 분석 대상 child 턴 index 정보 포함.
    """
    normalized: List[DialogueTurn] = []
    for idx, turn in enumerate(request.dialogue_log):
        speaker = turn.get("speaker", "other")
        if speaker not in ["caregiver", "child"]:
            speaker = "other"
        normalized.append(
            DialogueTurn(
                speaker=speaker,
                utterance=turn.get("utterance", ""),
                turn_index=idx,
            )
        )

    target_indices = [t.turn_index for t in normalized if t.speaker == "child"]
    if not target_indices:
        target_indices = [len(normalized) - 1] if normalized else []

    analysis_focus = {
        "target_speaker": "child",
        "target_turn_indices": target_indices,
    }

    return {
        "request": request,
        "profile": profile,
        "normalized_dialogue": normalized,
        "analysis_focus": analysis_focus,
    }