Spaces:
Sleeping
Sleeping
File size: 3,349 Bytes
d445415 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
# utils/emotion_utils.py
import re
import math
import numpy as np
from collections import defaultdict
LABELS = ['๋ถํ/๋ถ๋ง', 'ํ์/ํธ์', '๊ฐ๋/๊ฐํ', '์ง๊ธ์ง๊ธ', '๊ณ ๋ง์', '์ฌํ',
'ํ๋จ/๋ถ๋
ธ', '์กด๊ฒฝ', '๊ธฐ๋๊ฐ', '์ฐ์ญ๋/๋ฌด์ํจ', '์ํ๊น์/์ค๋ง', '๋น์ฅํจ',
'์์ฌ/๋ถ์ ', '๋ฟ๋ฏํจ', 'ํธ์/์พ์ ', '์ ๊ธฐํจ/๊ด์ฌ', '์๊ปด์ฃผ๋', '๋ถ๋๋ฌ์',
'๊ณตํฌ/๋ฌด์์', '์ ๋ง', 'ํ์ฌํจ', '์ญ๊ฒจ์/์ง๊ทธ๋ฌ์', '์ง์ฆ', '์ด์ด์์',
'์์', 'ํจ๋ฐฐ/์๊ธฐํ์ค', '๊ท์ฐฎ์', 'ํ๋ฆ/์ง์นจ', '์ฆ๊ฑฐ์/์ ๋จ', '๊นจ๋ฌ์',
'์ฃ์ฑ
๊ฐ', '์ฆ์ค/ํ์ค', 'ํ๋ญํจ(๊ท์ฌ์/์์จ)', '๋นํฉ/๋์ฒ', '๊ฒฝ์
',
'๋ถ๋ด/์_๋ดํด', '์๋ฌ์', '์ฌ๋ฏธ์์', '๋ถ์ํจ/์ฐ๋ฏผ', '๋๋', 'ํ๋ณต',
'๋ถ์/๊ฑฑ์ ', '๊ธฐ์จ', '์์ฌ/์ ๋ขฐ']
NEGATIVE_EMOTIONS = [
'๋ถํ/๋ถ๋ง', '์ง๊ธ์ง๊ธ', '์ฌํ', 'ํ๋จ/๋ถ๋
ธ', '์์ฌ/๋ถ์ ', '๊ณตํฌ/๋ฌด์์', '์ ๋ง', 'ํ์ฌํจ',
'์ญ๊ฒจ์/์ง๊ทธ๋ฌ์', '์ง์ฆ', '์ด์ด์์', 'ํจ๋ฐฐ/์๊ธฐํ์ค', '๊ท์ฐฎ์', 'ํ๋ฆ/์ง์นจ', '์ฃ์ฑ
๊ฐ',
'์ฆ์ค/ํ์ค', '๋นํฉ/๋์ฒ', '๋ถ๋ด/์_๋ดํด', '์๋ฌ์', '์ฌ๋ฏธ์์'
]
def parse_dialogue(text: str):
"""
"ํ์:๋ฌธ์ฅ" ํ์์ ์ค๋จ์ ๋ํ๋ฅผ ํ์ฑ
"""
lines = [ln.strip() for ln in text.strip().split("\n") if ln.strip()]
pairs = []
for line in lines:
m = re.match(r"([^:]+):(.+)", line)
if m:
pairs.append((m.group(1).strip(), m.group(2).strip()))
return pairs
def adjusted_score(raw_prob: float, k: float = 5.0) -> float:
"""
[0,1] ํ๋ฅ ์ ์ฌ๋์ด ์ง๊ด์ ์ผ๋ก ์ดํดํ ์ ์๋๋ก 0~100 ์ ์๋ก ์ค์ผ์ผ๋ง
(logistic stretching)
"""
return 100.0 / (1.0 + math.exp(-k * (raw_prob - 0.5)))
def apply_ema(series, alpha=0.4):
if not series:
return []
smoothed = [series[0]]
for s in series[1:]:
smoothed.append(alpha * s + (1 - alpha) * smoothed[-1])
return smoothed
def detect_emotion_spikes(emotion_series, z_thresh=1.8, min_len=5):
"""
Z-score ๊ธฐ๋ฐ ๊ธ๋ณ ๊ฐ์ ํ์ง
- ํต๊ณ์ ๋ฐฐ๊ฒฝ: ํ์ค์ ์ z = (x - ฮผ) / ฯ
- ๊ถ์ฅ z ์๊ณ: 1.8~2.5 (๋ฐ์ดํฐ ๋ณ๋์ฑ์ ๋ฐ๋ผ ์กฐ์ )
"""
if len(emotion_series) < min_len:
return []
mean = float(np.mean(emotion_series))
std = float(np.std(emotion_series)) + 1e-6
spikes = []
for i, v in enumerate(emotion_series):
z = (v - mean) / std
if z >= z_thresh:
spikes.append((i, v, round(z, 2)))
return spikes
def infer_conflict_initiator(dialogue, spikes_by_speaker):
"""
๊ฐ๋จํ ๊ท์น๊ธฐ๋ฐ ์ ๋ฐ์ ์ถ์ :
- B์ ๊ฐ์ ์ด ๊ธ๋ณํ ์์ idx์ ์ง์ ๋ฐํ์๊ฐ A๋ผ๋ฉด, A๊ฐ ์ ๋ฐ count +1
- ๊ฐ์ฅ ๋ง์ ๊ธ๋ณ์ ์ ๋ฐํ ํ์๋ฅผ '์ถ์ ์ ๋ฐ์'๋ก ๋ฐํ
"""
blame = {}
for speaker, spikes in spikes_by_speaker.items():
for (idx, v, z) in spikes:
if idx == 0:
continue
prev_speaker = dialogue[idx - 1][0]
if prev_speaker != speaker:
blame[prev_speaker] = blame.get(prev_speaker, 0) + 1
if not blame:
return None
return sorted(blame.items(), key=lambda x: -x[1])[0][0] |