Spaces:

AmiKim
/

KoreanSentimentAnalysis

Runtime error

App Files Files Community

AmiKim commited on Apr 30, 2025

Commit

5be445d

verified ·

1 Parent(s): 18b9ede

knu기반

Browse files

Files changed (1) hide show

app.py +38 -117

app.py CHANGED Viewed

@@ -1,123 +1,44 @@
-import gradio as gr
 import pandas as pd
-import re
-from collections import Counter
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-import torch
-import torch.nn.functional as F
-from datetime import datetime
-# 모델 준비
-model_name = "hun3359/mdistilbertV3.1-sentiment"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForSequenceClassification.from_pretrained(model_name)
-if hasattr(model.config, "id2label"):
-    labels = [label for _, label in sorted(model.config.id2label.items())]
-else:
-    labels = ['기쁨', '분노', '불안', '슬픔', '중립']
-# 감정 분석 함수
-def analyze_emotions(messages):
-    emotions = []
-    for msg in messages:
-        inputs = tokenizer(msg, return_tensors="pt", truncation=True, padding=True)
-        with torch.no_grad():
-            outputs = model(**inputs)
-            probs = F.softmax(outputs.logits, dim=1)
-            pred = torch.argmax(probs, dim=1).item()
-            emotions.append(labels[pred])
-    return emotions
-# 메시지 병합 함수
-def merge_similar_messages(df):
-    if df.empty:
-        return df
-    def parse_time(row):
-        date_str = " ".join(row["날짜"].split()[:3])  # "2024년 10월 21일"
-        time_str = row["시간"].replace("오전", "AM").replace("오후", "PM")
-        full_str = f"{date_str} {time_str}"
-        return datetime.strptime(full_str, "%Y년 %m월 %d일 %p %I:%M")
-    df["datetime"] = df.apply(parse_time, axis=1)
-    merged = []
-    current = df.iloc[0].copy()
-    for i in range(1, len(df)):
-        row = df.iloc[i]
-        same_sender = current["보낸사람"] == row["보낸사람"]
-        same_date = current["날짜"] == row["날짜"]
-        time_diff = (row["datetime"] - current["datetime"]).total_seconds() / 60
-        if same_sender and same_date and time_diff <= 1:
-            current["메시지"] += " " + row["메시지"]
-        else:
-            merged.append(current)
-            current = row.copy()
-    merged.append(current)
-    return pd.DataFrame(merged).drop(columns=["datetime"])
-# 카카오톡 대화 파서
-def kakao_text_parser(text):
-    date_pattern = re.compile(r"-{7,} (\d{4}년 \d{1,2}월 \d{1,2}일 .요일) -{7,}")
-    msg_pattern = re.compile(r"\[(.+?)\]\s+\[(.+?)\]\s+(.+)")
-    data = []
-    current_date = None
-    for line in text.splitlines():
-        line = line.strip()
-        date_match = date_pattern.match(line)
-        if date_match:
-            current_date = date_match.group(1)
-            continue
-        msg_match = msg_pattern.match(line)
-        if msg_match and current_date:
-            sender, time, message = msg_match.groups()
-            data.append({
-                "날짜": current_date,
-                "보낸사람": sender,
-                "시간": time,
-                "메시지": message
-            })
-    return pd.DataFrame(data)
-# 전체 함수
-def process_kakao_file(file):
-    if hasattr(file, "read"):
-        content = file.read().decode("utf-8")
     else:
-        content = file.decode("utf-8") if isinstance(file, bytes) else file
-    df = kakao_text_parser(content)
-    if df.empty:
-        return "❌ 유효한 메시지를 찾을 수 없습니다."
-    df = merge_similar_messages(df)
-    df["감정"] = analyze_emotions(df["메시지"])
-    # 사용자별 감정 요약
-    result_text = ""
-    grouped = df.groupby("보낸사람")["감정"]
-    for user, emotions in grouped:
-        counts = Counter(emotions)
-        result_text += f"👤 {user} ({len(emotions)}개 메시지)\n"
-        for emotion, count in counts.items():
-            result_text += f"  - {emotion}: {count}회\n"
-        result_text += "\n"
-    return result_text.strip()
-# ✅ Gradio 앱 실행
-gr.Interface(
-    fn=process_kakao_file,
-    inputs=gr.File(label="카카오톡 대화 파일 (.txt)"),
     outputs="text",
-    title="카카오톡 감정 분석기",
-    description="카카오톡 대화 내용을 업로드하면, 참여자별 감정 분포를 분석해드립니다 😊"
-).launch(share=True)

 import pandas as pd
+from konlpy.tag import Okt
+import gradio as gr
+# 감성 사전 로딩
+knu_lex = pd.read_csv('KnuSentiLex.txt', sep='\t', names=['word', 'score', 'desc'])
+# 형태소 분석기
+okt = Okt()
+# 감정 점수 계산 함수
+def get_sentiment_score(text):
+    tokens = okt.morphs(text)
+    score = 0
+    matched_words = []
+    for token in tokens:
+        matched = knu_lex[knu_lex['word'] == token]
+        if not matched.empty:
+            token_score = int(matched.iloc[0]['score'])
+            score += token_score
+            matched_words.append(f"{token}({token_score})")
+    interpretation = ""
+    if score >= 2:
+        interpretation = "😊 긍정적인 문장입니다!"
+    elif score <= -2:
+        interpretation = "☹️ 부정적인 문장입니다."
     else:
+        interpretation = "😐 중립적인 문장입니다."
+    return f"▶ 감정 점수: {score}\n▶ 감정 단어: {', '.join(matched_words)}\n\n{interpretation}"
+# Gradio 인터페이스
+iface = gr.Interface(
+    fn=get_sentiment_score,
+    inputs=gr.Textbox(lines=3, placeholder="카카오톡 메시지를 입력하세요"),
     outputs="text",
+    title="KNU 감성사전 기반 감정 분석기",
+    description="카카오톡 등 구어체 문장을 입력하면 감정 점수를 계산해줍니다. (사전 기반 분석)"
+)
+if __name__ == "__main__":
+    iface.launch()