AmiKim commited on
Commit
6b16f50
ยท
verified ยท
1 Parent(s): bc1ee0f

error fix

Browse files
Files changed (1) hide show
  1. app.py +47 -28
app.py CHANGED
@@ -1,11 +1,12 @@
1
  import gradio as gr
 
 
 
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
  import torch.nn.functional as F
5
- import re
6
- from collections import defaultdict, Counter
7
 
8
- # ๋ชจ๋ธ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
9
  model_name = "hun3359/mdistilbertV3.1-sentiment"
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
@@ -15,20 +16,32 @@ if hasattr(model.config, "id2label"):
15
  else:
16
  labels = ['๊ธฐ์จ', '๋ถ„๋…ธ', '๋ถˆ์•ˆ', '์Šฌํ””', '์ค‘๋ฆฝ']
17
 
18
- # ์นด์นด์˜คํ†ก ๋Œ€ํ™” ํŒŒ์‹ฑ
19
- def parse_kakao_chat(text):
20
- chat_by_user = defaultdict(list)
21
- message_pattern = re.compile(r'^\[(.+?)\] \[.+?\] (.+)$')
 
 
 
 
22
  for line in text.splitlines():
23
  line = line.strip()
24
- match = message_pattern.match(line)
25
- if match:
26
- user, message = match.groups()
27
- chat_by_user[user].append(message)
28
- return chat_by_user
29
-
 
 
 
 
 
 
 
 
30
 
31
- # ๊ฐ์ • ๋ถ„์„ ํ•จ์ˆ˜
32
  def analyze_emotions(messages):
33
  emotions = []
34
  for msg in messages:
@@ -40,32 +53,38 @@ def analyze_emotions(messages):
40
  emotions.append(labels[pred])
41
  return emotions
42
 
43
- # ์ „์ฒด ๋ถ„์„ ํŒŒ์ดํ”„๋ผ์ธ
44
  def process_kakao_file(file):
45
- if hasattr(file, "read"): # ๋กœ์ปฌ ์‹คํ–‰ ๋˜๋Š” ์ผ๋ฐ˜ ํŒŒ์ผ ๊ฐ์ฒด์ธ ๊ฒฝ์šฐ
 
46
  content = file.read().decode("utf-8")
47
- else: # Hugging Face Spaces์—์„œ๋Š” NamedString(str) ๊ฐ์ฒด
48
  content = file.decode("utf-8") if isinstance(file, bytes) else file
49
 
50
- chat_by_user = parse_kakao_chat(content)
51
- result_text = ""
 
 
 
 
52
 
53
- for user, messages in chat_by_user.items():
54
- emotions = analyze_emotions(messages)
 
 
55
  counts = Counter(emotions)
56
- result_text += f"๐Ÿ‘ค {user} ({len(messages)}๊ฐœ ๋ฉ”์‹œ์ง€)\n"
57
  for emotion, count in counts.items():
58
  result_text += f" - {emotion}: {count}ํšŒ\n"
59
  result_text += "\n"
60
 
61
- return result_text if result_text else "์œ ํšจํ•œ ๋ฉ”์‹œ์ง€๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค."
62
-
63
 
64
- # Gradio UI ๊ตฌ์„ฑ
65
  gr.Interface(
66
  fn=process_kakao_file,
67
- inputs=gr.File(label="์นด์นด์˜คํ†ก ๋Œ€ํ™” txt ํŒŒ์ผ ์—…๋กœ๋“œ (.txt)"),
68
  outputs="text",
69
  title="์นด์นด์˜คํ†ก ๊ฐ์ • ๋ถ„์„๊ธฐ",
70
- description="์นด์นด์˜คํ†ก ๋Œ€ํ™” ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜๋ฉด ์ฐธ์—ฌ์ž๋ณ„ ๊ฐ์ • ๋ถ„ํฌ๋ฅผ ๋ถ„์„ํ•ด์ค๋‹ˆ๋‹ค. ๐Ÿ˜Š"
71
- ).launch()
 
1
  import gradio as gr
2
+ import pandas as pd
3
+ import re
4
+ from collections import Counter
5
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
  import torch
7
  import torch.nn.functional as F
 
 
8
 
9
+ # โœ… ๊ฐ์ • ๋ถ„์„ ๋ชจ๋ธ ์ค€๋น„
10
  model_name = "hun3359/mdistilbertV3.1-sentiment"
11
  tokenizer = AutoTokenizer.from_pretrained(model_name)
12
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
 
16
  else:
17
  labels = ['๊ธฐ์จ', '๋ถ„๋…ธ', '๋ถˆ์•ˆ', '์Šฌํ””', '์ค‘๋ฆฝ']
18
 
19
+ # โœ… ์นด์นด์˜คํ†ก ํŒŒ์‹ฑ ํ•จ์ˆ˜
20
+ def kakao_text_parser(text):
21
+ date_pattern = re.compile(r"-{7,} (\d{4}๋…„ \d{1,2}์›” \d{1,2}์ผ .์š”์ผ) -{7,}")
22
+ msg_pattern = re.compile(r"\[(.*?)\] \[(.*?)\] (.+)")
23
+
24
+ data = []
25
+ current_date = None
26
+
27
  for line in text.splitlines():
28
  line = line.strip()
29
+ date_match = date_pattern.match(line)
30
+ if date_match:
31
+ current_date = date_match.group(1)
32
+ continue
33
+ msg_match = msg_pattern.match(line)
34
+ if msg_match and current_date:
35
+ sender, time, message = msg_match.groups()
36
+ data.append({
37
+ "๋‚ ์งœ": current_date,
38
+ "๋ณด๋‚ธ์‚ฌ๋žŒ": sender,
39
+ "์‹œ๊ฐ„": time,
40
+ "๋ฉ”์‹œ์ง€": message
41
+ })
42
+ return pd.DataFrame(data)
43
 
44
+ # โœ… ๊ฐ์ • ๋ถ„์„ ํ•จ์ˆ˜
45
  def analyze_emotions(messages):
46
  emotions = []
47
  for msg in messages:
 
53
  emotions.append(labels[pred])
54
  return emotions
55
 
56
+ # โœ… ์ „์ฒด ๋ถ„์„ ํ•จ์ˆ˜
57
  def process_kakao_file(file):
58
+ # Hugging Face Spaces์—์„œ๋Š” file์ด NamedString์ด๋ฏ€๋กœ ์ง์ ‘ ๋ฌธ์ž์—ด ์ฒ˜๋ฆฌ
59
+ if hasattr(file, "read"):
60
  content = file.read().decode("utf-8")
61
+ else:
62
  content = file.decode("utf-8") if isinstance(file, bytes) else file
63
 
64
+ df = kakao_text_parser(content)
65
+
66
+ if df.empty:
67
+ return "โŒ ์œ ํšจํ•œ ๋ฉ”์‹œ์ง€๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
68
+
69
+ df["๊ฐ์ •"] = analyze_emotions(df["๋ฉ”์‹œ์ง€"])
70
 
71
+ # ์‚ฌ์šฉ์ž๋ณ„ ๊ฐ์ • ์š”์•ฝ
72
+ result_text = ""
73
+ grouped = df.groupby("๋ณด๋‚ธ์‚ฌ๋žŒ")["๊ฐ์ •"]
74
+ for user, emotions in grouped:
75
  counts = Counter(emotions)
76
+ result_text += f"๐Ÿ‘ค {user} ({len(emotions)}๊ฐœ ๋ฉ”์‹œ์ง€)\n"
77
  for emotion, count in counts.items():
78
  result_text += f" - {emotion}: {count}ํšŒ\n"
79
  result_text += "\n"
80
 
81
+ return result_text
 
82
 
83
+ # โœ… Gradio ์•ฑ ๊ตฌ์„ฑ
84
  gr.Interface(
85
  fn=process_kakao_file,
86
+ inputs=gr.File(label="์นด์นด์˜คํ†ก ๋Œ€ํ™” ํŒŒ์ผ (.txt)"),
87
  outputs="text",
88
  title="์นด์นด์˜คํ†ก ๊ฐ์ • ๋ถ„์„๊ธฐ",
89
+ description="์นด์นด์˜คํ†ก ๋Œ€ํ™” ๋‚ด์šฉ์„ ์—…๋กœ๋“œํ•˜๋ฉด, ์ฐธ์—ฌ์ž๋ณ„ ๊ฐ์ • ๋ถ„ํฌ๋ฅผ ๋ถ„์„ํ•ด๋“œ๋ฆฝ๋‹ˆ๋‹ค ๐Ÿ˜Š"
90
+ ).launch(share=True)