AmiKim commited on
Commit
5be445d
·
verified ·
1 Parent(s): 18b9ede

knu기반

Browse files
Files changed (1) hide show
  1. app.py +38 -117
app.py CHANGED
@@ -1,123 +1,44 @@
1
- import gradio as gr
2
  import pandas as pd
3
- import re
4
- from collections import Counter
5
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
- import torch
7
- import torch.nn.functional as F
8
- from datetime import datetime
9
-
10
- # 모델 준비
11
- model_name = "hun3359/mdistilbertV3.1-sentiment"
12
- tokenizer = AutoTokenizer.from_pretrained(model_name)
13
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
14
-
15
- if hasattr(model.config, "id2label"):
16
- labels = [label for _, label in sorted(model.config.id2label.items())]
17
- else:
18
- labels = ['기쁨', '분노', '불안', '슬픔', '중립']
19
-
20
- # 감정 분석 함수
21
- def analyze_emotions(messages):
22
- emotions = []
23
- for msg in messages:
24
- inputs = tokenizer(msg, return_tensors="pt", truncation=True, padding=True)
25
- with torch.no_grad():
26
- outputs = model(**inputs)
27
- probs = F.softmax(outputs.logits, dim=1)
28
- pred = torch.argmax(probs, dim=1).item()
29
- emotions.append(labels[pred])
30
- return emotions
31
-
32
- # 메시지 병합 함수
33
- def merge_similar_messages(df):
34
- if df.empty:
35
- return df
36
-
37
- def parse_time(row):
38
- date_str = " ".join(row["날짜"].split()[:3]) # "2024년 10월 21일"
39
- time_str = row["시간"].replace("오전", "AM").replace("오후", "PM")
40
- full_str = f"{date_str} {time_str}"
41
- return datetime.strptime(full_str, "%Y년 %m월 %d일 %p %I:%M")
42
-
43
- df["datetime"] = df.apply(parse_time, axis=1)
44
-
45
- merged = []
46
- current = df.iloc[0].copy()
47
-
48
- for i in range(1, len(df)):
49
- row = df.iloc[i]
50
- same_sender = current["보낸사람"] == row["보낸사람"]
51
- same_date = current["날짜"] == row["날짜"]
52
- time_diff = (row["datetime"] - current["datetime"]).total_seconds() / 60
53
-
54
- if same_sender and same_date and time_diff <= 1:
55
- current["메시지"] += " " + row["메시지"]
56
- else:
57
- merged.append(current)
58
- current = row.copy()
59
-
60
- merged.append(current)
61
- return pd.DataFrame(merged).drop(columns=["datetime"])
62
-
63
- # 카카오톡 대화 파서
64
- def kakao_text_parser(text):
65
- date_pattern = re.compile(r"-{7,} (\d{4}년 \d{1,2}월 \d{1,2}일 .요일) -{7,}")
66
- msg_pattern = re.compile(r"\[(.+?)\]\s+\[(.+?)\]\s+(.+)")
67
-
68
- data = []
69
- current_date = None
70
-
71
- for line in text.splitlines():
72
- line = line.strip()
73
- date_match = date_pattern.match(line)
74
- if date_match:
75
- current_date = date_match.group(1)
76
- continue
77
- msg_match = msg_pattern.match(line)
78
- if msg_match and current_date:
79
- sender, time, message = msg_match.groups()
80
- data.append({
81
- "날짜": current_date,
82
- "보낸사람": sender,
83
- "시간": time,
84
- "메시지": message
85
- })
86
-
87
- return pd.DataFrame(data)
88
 
89
- # 전 함수
90
- def process_kakao_file(file):
91
- if hasattr(file, "read"):
92
- content = file.read().decode("utf-8")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  else:
94
- content = file.decode("utf-8") if isinstance(file, bytes) else file
95
 
96
- df = kakao_text_parser(content)
97
 
98
- if df.empty:
99
- return "❌ 유효한 메시지를 찾을 수 없습니다."
100
-
101
- df = merge_similar_messages(df)
102
- df["감정"] = analyze_emotions(df["메시지"])
103
-
104
- # 사용자별 감정 요약
105
- result_text = ""
106
- grouped = df.groupby("보낸사람")["감정"]
107
- for user, emotions in grouped:
108
- counts = Counter(emotions)
109
- result_text += f"👤 {user} ({len(emotions)}개 메시지)\n"
110
- for emotion, count in counts.items():
111
- result_text += f" - {emotion}: {count}회\n"
112
- result_text += "\n"
113
-
114
- return result_text.strip()
115
-
116
- # ✅ Gradio 앱 실행
117
- gr.Interface(
118
- fn=process_kakao_file,
119
- inputs=gr.File(label="카카오톡 대화 파일 (.txt)"),
120
  outputs="text",
121
- title="카카오톡 감정 분석기",
122
- description="카카오톡 대화 내용업로드하면, 참여자별 감정 분포분석드립니다 😊"
123
- ).launch(share=True)
 
 
 
 
 
1
  import pandas as pd
2
+ from konlpy.tag import Okt
3
+ import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ # 감성 사로딩
6
+ knu_lex = pd.read_csv('KnuSentiLex.txt', sep='\t', names=['word', 'score', 'desc'])
7
+
8
+ # 형태소 분석기
9
+ okt = Okt()
10
+
11
+ # 감정 점수 계산 함수
12
+ def get_sentiment_score(text):
13
+ tokens = okt.morphs(text)
14
+ score = 0
15
+ matched_words = []
16
+
17
+ for token in tokens:
18
+ matched = knu_lex[knu_lex['word'] == token]
19
+ if not matched.empty:
20
+ token_score = int(matched.iloc[0]['score'])
21
+ score += token_score
22
+ matched_words.append(f"{token}({token_score})")
23
+
24
+ interpretation = ""
25
+ if score >= 2:
26
+ interpretation = "😊 긍정적인 문장입니다!"
27
+ elif score <= -2:
28
+ interpretation = "☹️ 부정적인 문장입니다."
29
  else:
30
+ interpretation = "😐 중립적인 문장입니다."
31
 
32
+ return f"▶ 감정 점수: {score}\n▶ 감정 단어: {', '.join(matched_words)}\n\n{interpretation}"
33
 
34
+ # Gradio 인터페이스
35
+ iface = gr.Interface(
36
+ fn=get_sentiment_score,
37
+ inputs=gr.Textbox(lines=3, placeholder="카카오톡 메시지를 입력하세요"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  outputs="text",
39
+ title="KNU성사전 기반 감정 분석기",
40
+ description="카카오톡 구어체 문장입력하면 감정 점수계산니다. (사전 기반 분석)"
41
+ )
42
+
43
+ if __name__ == "__main__":
44
+ iface.launch()