Spaces:

AmiKim
/

KoreanSentimentAnalysis

Runtime error

App Files Files Community

AmiKim commited on Apr 28, 2025

Commit

6b16f50

verified ·

1 Parent(s): bc1ee0f

error fix

Browse files

Files changed (1) hide show

app.py +47 -28

app.py CHANGED Viewed

@@ -1,11 +1,12 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 import torch.nn.functional as F
-import re
-from collections import defaultdict, Counter
-# 모델 불러오기
 model_name = "hun3359/mdistilbertV3.1-sentiment"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForSequenceClassification.from_pretrained(model_name)
@@ -15,20 +16,32 @@ if hasattr(model.config, "id2label"):
 else:
     labels = ['기쁨', '분노', '불안', '슬픔', '중립']
-# 카카오톡 대화 파싱
-def parse_kakao_chat(text):
-    chat_by_user = defaultdict(list)
-    message_pattern = re.compile(r'^\[(.+?)\] \[.+?\] (.+)$')
     for line in text.splitlines():
         line = line.strip()
-        match = message_pattern.match(line)
-        if match:
-            user, message = match.groups()
-            chat_by_user[user].append(message)
-    return chat_by_user
-# 감정 분석 함수
 def analyze_emotions(messages):
     emotions = []
     for msg in messages:
@@ -40,32 +53,38 @@ def analyze_emotions(messages):
             emotions.append(labels[pred])
     return emotions
-# 전체 분석 파이프라인
 def process_kakao_file(file):
-    if hasattr(file, "read"):  # 로컬 실행 또는 일반 파일 객체인 경우
         content = file.read().decode("utf-8")
-    else:  # Hugging Face Spaces에서는 NamedString(str) 객체
         content = file.decode("utf-8") if isinstance(file, bytes) else file
-    chat_by_user = parse_kakao_chat(content)
-    result_text = ""
-    for user, messages in chat_by_user.items():
-        emotions = analyze_emotions(messages)
         counts = Counter(emotions)
-        result_text += f"👤 {user} ({len(messages)}개 메시지)\n"
         for emotion, count in counts.items():
             result_text += f"  - {emotion}: {count}회\n"
         result_text += "\n"
-    return result_text if result_text else "유효한 메시지가 없습니다."
-# Gradio UI 구성
 gr.Interface(
     fn=process_kakao_file,
-    inputs=gr.File(label="카카오톡 대화 txt 파일 업로드 (.txt)"),
     outputs="text",
     title="카카오톡 감정 분석기",
-    description="카카오톡 대화 파일을 업로드하면 참여자별 감정 분포를 분석해줍니다. 😊"
-).launch()

 import gradio as gr
+import pandas as pd
+import re
+from collections import Counter
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 import torch.nn.functional as F
+# ✅ 감정 분석 모델 준비
 model_name = "hun3359/mdistilbertV3.1-sentiment"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForSequenceClassification.from_pretrained(model_name)
 else:
     labels = ['기쁨', '분노', '불안', '슬픔', '중립']
+# ✅ 카카오톡 파싱 함수
+def kakao_text_parser(text):
+    date_pattern = re.compile(r"-{7,} (\d{4}년 \d{1,2}월 \d{1,2}일 .요일) -{7,}")
+    msg_pattern = re.compile(r"\[(.*?)\] \[(.*?)\] (.+)")
+    data = []
+    current_date = None
     for line in text.splitlines():
         line = line.strip()
+        date_match = date_pattern.match(line)
+        if date_match:
+            current_date = date_match.group(1)
+            continue
+        msg_match = msg_pattern.match(line)
+        if msg_match and current_date:
+            sender, time, message = msg_match.groups()
+            data.append({
+                "날짜": current_date,
+                "보낸사람": sender,
+                "시간": time,
+                "메시지": message
+            })
+    return pd.DataFrame(data)
+# ✅ 감정 분석 함수
 def analyze_emotions(messages):
     emotions = []
     for msg in messages:
             emotions.append(labels[pred])
     return emotions
+# ✅ 전체 분석 함수
 def process_kakao_file(file):
+    # Hugging Face Spaces에서는 file이 NamedString이므로 직접 문자열 처리
+    if hasattr(file, "read"):
         content = file.read().decode("utf-8")
+    else:
         content = file.decode("utf-8") if isinstance(file, bytes) else file
+    df = kakao_text_parser(content)
+    if df.empty:
+        return "❌ 유효한 메시지를 찾을 수 없습니다."
+    df["감정"] = analyze_emotions(df["메시지"])
+    # 사용자별 감정 요약
+    result_text = ""
+    grouped = df.groupby("보낸사람")["감정"]
+    for user, emotions in grouped:
         counts = Counter(emotions)
+        result_text += f"👤 {user} ({len(emotions)}개 메시지)\n"
         for emotion, count in counts.items():
             result_text += f"  - {emotion}: {count}회\n"
         result_text += "\n"
+    return result_text
+# ✅ Gradio 앱 구성
 gr.Interface(
     fn=process_kakao_file,
+    inputs=gr.File(label="카카오톡 대화 파일 (.txt)"),
     outputs="text",
     title="카카오톡 감정 분석기",
+    description="카카오톡 대화 내용을 업로드하면, 참여자별 감정 분포를 분석해드립니다 😊"
+).launch(share=True)