minjune121 commited on
Commit
0ce3b2b
ยท
verified ยท
1 Parent(s): 8a1d76f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -23
app.py CHANGED
@@ -3,12 +3,16 @@ import pandas as pd
3
  import numpy as np
4
  import torch
5
  from transformers import pipeline
 
6
 
7
  # ===============================
8
- # ๋ชจ๋ธ ๋กœ๋“œ
9
  # ===============================
10
  device = 0 if torch.cuda.is_available() else -1
11
 
 
 
 
12
  stt_model = pipeline(
13
  "automatic-speech-recognition",
14
  model="openai/whisper-large-v3-turbo",
@@ -19,13 +23,22 @@ emotion_model = pipeline(
19
  "text-classification",
20
  model="monologg/koelectra-base-v3-goemotions",
21
  device=device,
22
- top_k=1
23
  )
24
 
25
- # ๋ฐ์ดํ„ฐ ๋กœ๋“œ
 
 
 
 
26
  df = pd.read_csv("book_db_final.csv")
27
 
 
 
 
 
28
  # ๊ฐ์ • ๋งคํ•‘
 
29
  EMOTION_MAP = {
30
  "joy": "๊ธฐ์จ",
31
  "sadness": "์Šฌํ””",
@@ -37,19 +50,77 @@ EMOTION_MAP = {
37
  "optimism": "๊ธฐ๋Œ€"
38
  }
39
 
 
 
40
  # ===============================
41
- # ๋ฉ”์ธ ์ฒ˜๋ฆฌ ํ•จ์ˆ˜
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  # ===============================
43
  def process_voice_only(audio_input):
44
  if audio_input is None:
45
  return {"error": "์Œ์„ฑ์„ ๋…น์Œํ•ด์ฃผ์„ธ์š”."}
46
 
47
  try:
48
- # STT
49
  sr, y = audio_input
50
  y = y.astype(np.float32)
51
  y /= np.max(np.abs(y)) if np.max(np.abs(y)) > 0 else 1
52
 
 
53
  stt_result = stt_model({"sampling_rate": sr, "raw": y})
54
  final_text = stt_result["text"]
55
 
@@ -57,31 +128,28 @@ def process_voice_only(audio_input):
57
  return {"error": "์Œ์„ฑ์ด ์ธ์‹๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}
58
 
59
  # ๊ฐ์ • ๋ถ„์„
60
- emo_result = emotion_model(final_text)[0][0]
61
- raw_label = emo_result["label"].lower()
62
- best_emo = EMOTION_MAP.get(raw_label, "๊ธฐ๋Œ€")
63
 
64
- # ์ถ”์ฒœ
65
- recs = df[df["emotion"] == best_emo].head(3)
66
 
67
- books = []
68
- for _, row in recs.iterrows():
69
- books.append({
70
- "title": row["title"],
71
- "url": row["url"],
72
- "contents": str(row.get("contents", ""))[:120]
73
- })
74
 
75
  return {
76
  "text": final_text,
77
  "emotion": best_emo,
 
 
 
 
 
78
  "books": books
79
  }
80
 
81
  except Exception as e:
82
  return {"error": str(e)}
83
 
84
-
85
  # ===============================
86
  # UI
87
  # ===============================
@@ -91,15 +159,15 @@ with gr.Blocks() as demo:
91
  with gr.Row():
92
  with gr.Column():
93
  audio_in = gr.Audio(label="๋งˆ์ดํฌ ์ž…๋ ฅ", sources=["microphone"])
94
- submit_btn = gr.Button("๋ถ„์„", variant="primary")
95
 
96
  with gr.Column():
97
- output_json = gr.JSON(label="๊ฒฐ๊ณผ")
98
 
99
- submit_btn.click(
100
  fn=process_voice_only,
101
- inputs=[audio_in],
102
- outputs=output_json
103
  )
104
 
105
  demo.launch()
 
3
  import numpy as np
4
  import torch
5
  from transformers import pipeline
6
+ from sentence_transformers import SentenceTransformer, util
7
 
8
  # ===============================
9
+ # ์„ค์ •
10
  # ===============================
11
  device = 0 if torch.cuda.is_available() else -1
12
 
13
+ # ===============================
14
+ # ๋ชจ๋ธ ๋กœ๋“œ
15
+ # ===============================
16
  stt_model = pipeline(
17
  "automatic-speech-recognition",
18
  model="openai/whisper-large-v3-turbo",
 
23
  "text-classification",
24
  model="monologg/koelectra-base-v3-goemotions",
25
  device=device,
26
+ top_k=None
27
  )
28
 
29
+ sbert_model = SentenceTransformer("jhgan/ko-sroberta-multitask")
30
+
31
+ # ===============================
32
+ # ๋ฐ์ดํ„ฐ ๋กœ๋“œ + ์ž„๋ฒ ๋”ฉ ์บ์‹ฑ
33
+ # ===============================
34
  df = pd.read_csv("book_db_final.csv")
35
 
36
+ book_texts = df["contents"].fillna(df["title"]).tolist()
37
+ book_embeddings = sbert_model.encode(book_texts, convert_to_tensor=True)
38
+
39
+ # ===============================
40
  # ๊ฐ์ • ๋งคํ•‘
41
+ # ===============================
42
  EMOTION_MAP = {
43
  "joy": "๊ธฐ์จ",
44
  "sadness": "์Šฌํ””",
 
50
  "optimism": "๊ธฐ๋Œ€"
51
  }
52
 
53
+ EMOTION_LABELS = ["๊ธฐ์จ","์‹ ๋ขฐ","๊ณตํฌ","๋†€๋žŒ","์Šฌํ””","ํ˜์˜ค","๋ถ„๋…ธ","๊ธฐ๋Œ€"]
54
+
55
  # ===============================
56
+ # ๊ฐ์ • ๋ถ„์„
57
+ # ===============================
58
+ def get_emotion_scores(text):
59
+ results = emotion_model(text)[0]
60
+
61
+ scores = {emo: 0.0 for emo in EMOTION_LABELS}
62
+
63
+ # ๋ชจ๋ธ ์ ์ˆ˜
64
+ for r in results:
65
+ label = r["label"].lower()
66
+ mapped = EMOTION_MAP.get(label)
67
+ if mapped:
68
+ scores[mapped] += r["score"]
69
+
70
+ # ํ•œ๊ตญ์–ด ๋ณด์ •
71
+ t = text.lower()
72
+ if "์Šฌํ”„" in t or "์šฐ์šธ" in t:
73
+ scores["์Šฌํ””"] += 0.3
74
+ if "ํ™”๋‚˜" in t or "์งœ์ฆ" in t:
75
+ scores["๋ถ„๋…ธ"] += 0.3
76
+ if "ํ–‰๋ณต" in t or "์ข‹๋‹ค" in t:
77
+ scores["๊ธฐ์จ"] += 0.3
78
+
79
+ return scores
80
+
81
+ # ===============================
82
+ # ์ถ”์ฒœ (SBERT ์ตœ์ ํ™”)
83
+ # ===============================
84
+ def recommend_books(user_text, emotion):
85
+ pool = df[df["emotion"] == emotion]
86
+
87
+ if pool.empty:
88
+ return []
89
+
90
+ idxs = pool.index.tolist()
91
+ pool_embs = book_embeddings[idxs]
92
+
93
+ user_emb = sbert_model.encode(user_text, convert_to_tensor=True)
94
+ sims = util.cos_sim(user_emb, pool_embs)[0].cpu().numpy()
95
+
96
+ pool = pool.copy()
97
+ pool["sim"] = sims
98
+
99
+ pool = pool.sort_values("sim", ascending=False).head(3)
100
+
101
+ books = []
102
+ for _, row in pool.iterrows():
103
+ books.append({
104
+ "title": row["title"],
105
+ "url": row["url"],
106
+ "contents": str(row.get("contents", ""))[:120]
107
+ })
108
+
109
+ return books
110
+
111
+ # ===============================
112
+ # ๋ฉ”์ธ ํ•จ์ˆ˜
113
  # ===============================
114
  def process_voice_only(audio_input):
115
  if audio_input is None:
116
  return {"error": "์Œ์„ฑ์„ ๋…น์Œํ•ด์ฃผ์„ธ์š”."}
117
 
118
  try:
 
119
  sr, y = audio_input
120
  y = y.astype(np.float32)
121
  y /= np.max(np.abs(y)) if np.max(np.abs(y)) > 0 else 1
122
 
123
+ # STT
124
  stt_result = stt_model({"sampling_rate": sr, "raw": y})
125
  final_text = stt_result["text"]
126
 
 
128
  return {"error": "์Œ์„ฑ์ด ์ธ์‹๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}
129
 
130
  # ๊ฐ์ • ๋ถ„์„
131
+ scores = get_emotion_scores(final_text)
 
 
132
 
133
+ best_emo = max(scores, key=scores.get)
134
+ top3 = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:3]
135
 
136
+ # ์ถ”์ฒœ
137
+ books = recommend_books(final_text, best_emo)
 
 
 
 
 
138
 
139
  return {
140
  "text": final_text,
141
  "emotion": best_emo,
142
+ "emotion_scores": {k: round(v, 3) for k, v in scores.items()},
143
+ "top3": [
144
+ {"emotion": e, "score": round(s, 3)}
145
+ for e, s in top3
146
+ ],
147
  "books": books
148
  }
149
 
150
  except Exception as e:
151
  return {"error": str(e)}
152
 
 
153
  # ===============================
154
  # UI
155
  # ===============================
 
159
  with gr.Row():
160
  with gr.Column():
161
  audio_in = gr.Audio(label="๋งˆ์ดํฌ ์ž…๋ ฅ", sources=["microphone"])
162
+ btn = gr.Button("๋ถ„์„", variant="primary")
163
 
164
  with gr.Column():
165
+ output = gr.JSON(label="๊ฒฐ๊ณผ")
166
 
167
+ btn.click(
168
  fn=process_voice_only,
169
+ inputs=audio_in,
170
+ outputs=output
171
  )
172
 
173
  demo.launch()