Spaces:
Runtime error
Runtime error
error fix
Browse files
app.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
| 1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
| 2 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 3 |
import torch
|
| 4 |
import torch.nn.functional as F
|
| 5 |
-
import re
|
| 6 |
-
from collections import defaultdict, Counter
|
| 7 |
|
| 8 |
-
# ๋ชจ๋ธ
|
| 9 |
model_name = "hun3359/mdistilbertV3.1-sentiment"
|
| 10 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 11 |
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
|
@@ -15,20 +16,32 @@ if hasattr(model.config, "id2label"):
|
|
| 15 |
else:
|
| 16 |
labels = ['๊ธฐ์จ', '๋ถ๋
ธ', '๋ถ์', '์ฌํ', '์ค๋ฆฝ']
|
| 17 |
|
| 18 |
-
# ์นด์นด์คํก
|
| 19 |
-
def
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
for line in text.splitlines():
|
| 23 |
line = line.strip()
|
| 24 |
-
|
| 25 |
-
if
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
# ๊ฐ์ ๋ถ์ ํจ์
|
| 32 |
def analyze_emotions(messages):
|
| 33 |
emotions = []
|
| 34 |
for msg in messages:
|
|
@@ -40,32 +53,38 @@ def analyze_emotions(messages):
|
|
| 40 |
emotions.append(labels[pred])
|
| 41 |
return emotions
|
| 42 |
|
| 43 |
-
# ์ ์ฒด ๋ถ์
|
| 44 |
def process_kakao_file(file):
|
| 45 |
-
|
|
|
|
| 46 |
content = file.read().decode("utf-8")
|
| 47 |
-
else:
|
| 48 |
content = file.decode("utf-8") if isinstance(file, bytes) else file
|
| 49 |
|
| 50 |
-
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
|
|
|
|
|
|
| 55 |
counts = Counter(emotions)
|
| 56 |
-
result_text += f"๐ค {user} ({len(
|
| 57 |
for emotion, count in counts.items():
|
| 58 |
result_text += f" - {emotion}: {count}ํ\n"
|
| 59 |
result_text += "\n"
|
| 60 |
|
| 61 |
-
return result_text
|
| 62 |
-
|
| 63 |
|
| 64 |
-
# Gradio
|
| 65 |
gr.Interface(
|
| 66 |
fn=process_kakao_file,
|
| 67 |
-
inputs=gr.File(label="์นด์นด์คํก ๋ํ
|
| 68 |
outputs="text",
|
| 69 |
title="์นด์นด์คํก ๊ฐ์ ๋ถ์๊ธฐ",
|
| 70 |
-
description="์นด์นด์คํก ๋ํ
|
| 71 |
-
).launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import re
|
| 4 |
+
from collections import Counter
|
| 5 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 6 |
import torch
|
| 7 |
import torch.nn.functional as F
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
# โ
๊ฐ์ ๋ถ์ ๋ชจ๋ธ ์ค๋น
|
| 10 |
model_name = "hun3359/mdistilbertV3.1-sentiment"
|
| 11 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 12 |
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
|
|
|
| 16 |
else:
|
| 17 |
labels = ['๊ธฐ์จ', '๋ถ๋
ธ', '๋ถ์', '์ฌํ', '์ค๋ฆฝ']
|
| 18 |
|
| 19 |
+
# โ
์นด์นด์คํก ํ์ฑ ํจ์
|
| 20 |
+
def kakao_text_parser(text):
|
| 21 |
+
date_pattern = re.compile(r"-{7,} (\d{4}๋
\d{1,2}์ \d{1,2}์ผ .์์ผ) -{7,}")
|
| 22 |
+
msg_pattern = re.compile(r"\[(.*?)\] \[(.*?)\] (.+)")
|
| 23 |
+
|
| 24 |
+
data = []
|
| 25 |
+
current_date = None
|
| 26 |
+
|
| 27 |
for line in text.splitlines():
|
| 28 |
line = line.strip()
|
| 29 |
+
date_match = date_pattern.match(line)
|
| 30 |
+
if date_match:
|
| 31 |
+
current_date = date_match.group(1)
|
| 32 |
+
continue
|
| 33 |
+
msg_match = msg_pattern.match(line)
|
| 34 |
+
if msg_match and current_date:
|
| 35 |
+
sender, time, message = msg_match.groups()
|
| 36 |
+
data.append({
|
| 37 |
+
"๋ ์ง": current_date,
|
| 38 |
+
"๋ณด๋ธ์ฌ๋": sender,
|
| 39 |
+
"์๊ฐ": time,
|
| 40 |
+
"๋ฉ์์ง": message
|
| 41 |
+
})
|
| 42 |
+
return pd.DataFrame(data)
|
| 43 |
|
| 44 |
+
# โ
๊ฐ์ ๋ถ์ ํจ์
|
| 45 |
def analyze_emotions(messages):
|
| 46 |
emotions = []
|
| 47 |
for msg in messages:
|
|
|
|
| 53 |
emotions.append(labels[pred])
|
| 54 |
return emotions
|
| 55 |
|
| 56 |
+
# โ
์ ์ฒด ๋ถ์ ํจ์
|
| 57 |
def process_kakao_file(file):
|
| 58 |
+
# Hugging Face Spaces์์๋ file์ด NamedString์ด๋ฏ๋ก ์ง์ ๋ฌธ์์ด ์ฒ๋ฆฌ
|
| 59 |
+
if hasattr(file, "read"):
|
| 60 |
content = file.read().decode("utf-8")
|
| 61 |
+
else:
|
| 62 |
content = file.decode("utf-8") if isinstance(file, bytes) else file
|
| 63 |
|
| 64 |
+
df = kakao_text_parser(content)
|
| 65 |
+
|
| 66 |
+
if df.empty:
|
| 67 |
+
return "โ ์ ํจํ ๋ฉ์์ง๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."
|
| 68 |
+
|
| 69 |
+
df["๊ฐ์ "] = analyze_emotions(df["๋ฉ์์ง"])
|
| 70 |
|
| 71 |
+
# ์ฌ์ฉ์๋ณ ๊ฐ์ ์์ฝ
|
| 72 |
+
result_text = ""
|
| 73 |
+
grouped = df.groupby("๋ณด๋ธ์ฌ๋")["๊ฐ์ "]
|
| 74 |
+
for user, emotions in grouped:
|
| 75 |
counts = Counter(emotions)
|
| 76 |
+
result_text += f"๐ค {user} ({len(emotions)}๊ฐ ๋ฉ์์ง)\n"
|
| 77 |
for emotion, count in counts.items():
|
| 78 |
result_text += f" - {emotion}: {count}ํ\n"
|
| 79 |
result_text += "\n"
|
| 80 |
|
| 81 |
+
return result_text
|
|
|
|
| 82 |
|
| 83 |
+
# โ
Gradio ์ฑ ๊ตฌ์ฑ
|
| 84 |
gr.Interface(
|
| 85 |
fn=process_kakao_file,
|
| 86 |
+
inputs=gr.File(label="์นด์นด์คํก ๋ํ ํ์ผ (.txt)"),
|
| 87 |
outputs="text",
|
| 88 |
title="์นด์นด์คํก ๊ฐ์ ๋ถ์๊ธฐ",
|
| 89 |
+
description="์นด์นด์คํก ๋ํ ๋ด์ฉ์ ์
๋ก๋ํ๋ฉด, ์ฐธ์ฌ์๋ณ ๊ฐ์ ๋ถํฌ๋ฅผ ๋ถ์ํด๋๋ฆฝ๋๋ค ๐"
|
| 90 |
+
).launch(share=True)
|