Spaces:

leewatson
/

kshs33_emotion_predict

Sleeping

File size: 3,349 Bytes

d445415

# utils/emotion_utils.py
import re
import math
import numpy as np
from collections import defaultdict

LABELS = ['불평/불만', '환영/호의', '감동/감탄', '지긋지긋', '고마움', '슬픔',
          '화남/분노', '존경', '기대감', '우쭐댐/무시함', '안타까움/실망', '비장함',
          '의심/불신', '뿌듯함', '편안/쾌적', '신기함/관심', '아껴주는', '부끄러움',
          '공포/무서움', '절망', '한심함', '역겨움/징그러움', '짜증', '어이없음',
          '없음', '패배/자기혐오', '귀찮음', '힘듦/지침', '즐거움/신남', '깨달음',
          '죄책감', '증오/혐오', '흐뭇함(귀여움/예쁨)', '당황/난처', '경악',
          '부담/안_내킴', '서러움', '재미없음', '불쌍함/연민', '놀람', '행복',
          '불안/걱정', '기쁨', '안심/신뢰']

NEGATIVE_EMOTIONS = [
    '불평/불만', '지긋지긋', '슬픔', '화남/분노', '의심/불신', '공포/무서움', '절망', '한심함',
    '역겨움/징그러움', '짜증', '어이없음', '패배/자기혐오', '귀찮음', '힘듦/지침', '죄책감',
    '증오/혐오', '당황/난처', '부담/안_내킴', '서러움', '재미없음'
]

def parse_dialogue(text: str):
    """
    "화자:문장" 형식의 줄단위 대화를 파싱
    """
    lines = [ln.strip() for ln in text.strip().split("\n") if ln.strip()]
    pairs = []
    for line in lines:
        m = re.match(r"([^:]+):(.+)", line)
        if m:
            pairs.append((m.group(1).strip(), m.group(2).strip()))
    return pairs

def adjusted_score(raw_prob: float, k: float = 5.0) -> float:
    """
    [0,1] 확률을 사람이 직관적으로 이해할 수 있도록 0~100 점수로 스케일링
    (logistic stretching)
    """
    return 100.0 / (1.0 + math.exp(-k * (raw_prob - 0.5)))

def apply_ema(series, alpha=0.4):
    if not series:
        return []
    smoothed = [series[0]]
    for s in series[1:]:
        smoothed.append(alpha * s + (1 - alpha) * smoothed[-1])
    return smoothed

def detect_emotion_spikes(emotion_series, z_thresh=1.8, min_len=5):
    """
    Z-score 기반 급변 감정 탐지
    - 통계적 배경: 표준점수 z = (x - μ) / σ
    - 권장 z 임계: 1.8~2.5 (데이터 변동성에 따라 조정)
    """
    if len(emotion_series) < min_len:
        return []
    mean = float(np.mean(emotion_series))
    std = float(np.std(emotion_series)) + 1e-6
    spikes = []
    for i, v in enumerate(emotion_series):
        z = (v - mean) / std
        if z >= z_thresh:
            spikes.append((i, v, round(z, 2)))
    return spikes

def infer_conflict_initiator(dialogue, spikes_by_speaker):
    """
    간단한 규칙기반 유발자 추정:
    - B의 감정이 급변한 시점 idx의 직전 발화자가 A라면, A가 유발 count +1
    - 가장 많은 급변을 유발한 화자를 '추정 유발자'로 반환
    """
    blame = {}
    for speaker, spikes in spikes_by_speaker.items():
        for (idx, v, z) in spikes:
            if idx == 0:
                continue
            prev_speaker = dialogue[idx - 1][0]
            if prev_speaker != speaker:
                blame[prev_speaker] = blame.get(prev_speaker, 0) + 1
    if not blame:
        return None
    return sorted(blame.items(), key=lambda x: -x[1])[0][0]