minjune121 commited on
Commit
b9098cc
ยท
verified ยท
1 Parent(s): ca68191

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -41
app.py CHANGED
@@ -1,77 +1,89 @@
1
  import gradio as gr
2
  import pandas as pd
3
  import numpy as np
4
- from sentence_transformers import SentenceTransformer, util
5
  from transformers import pipeline
6
 
7
- # 1. ๋ชจ๋ธ ๋ฐ ๋ฐ์ดํ„ฐ ๋กœ๋“œ
8
- # ๋น ๋ฅธ ์†๋„๋ฅผ ์œ„ํ•ด whisper-tiny ์‚ฌ์šฉ
9
- stt_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
10
- sbert_model = SentenceTransformer("jhgan/ko-sroberta-multitask")
 
 
 
 
11
 
12
- # ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค ๋กœ๋“œ (ํŒŒ์ผ๋ช… ํ™•์ธ ํ•„์š”)
 
 
 
 
 
 
 
13
  df = pd.read_csv("book_db_final.csv")
14
 
15
- _EMOTION_DESCS = {
16
- "๊ธฐ์จ": "ํ–‰๋ณตํ•˜๊ณ  ์ฆ๊ฒ๊ณ  ์œ ์พŒํ•œ ๊ธฐ๋ถ„",
17
- "์‹ ๋ขฐ": "๋”ฐ๋œปํ•˜๊ณ  ์•ˆ์ •์ ์ด๋ฉฐ ๊ฐ€์กฑ๊ณผ ์šฐ์ • ๊ฐ™์€ ์œ ๋Œ€๊ฐ",
18
- "๊ณตํฌ": "๋ฌด์„ญ๊ณ  ๊ธด์žฅ๋˜๋ฉฐ ์Šค๋ฆด ์žˆ๋Š” ๊ณตํฌ์™€ ๋ถˆ์•ˆ",
19
- "๋†€๋žŒ": "๋ฐ˜์ „๊ณผ ์ถฉ๊ฒฉ, ์˜ˆ์ƒ์น˜ ๋ชปํ•œ ๊ฒฝ์ด๋กœ์›€",
20
- "์Šฌํ””": "์Šฌํ”„๊ณ  ์™ธ๋กญ๊ณ  ์ด๋ณ„๊ณผ ์ƒ์‹ค์˜ ๊ฐ์ •",
21
- "ํ˜์˜ค": "๋ถ€์กฐ๋ฆฌ์™€ ๋ถˆํ‰๋“ฑ, ์œ„์„ ์— ๋Œ€ํ•œ ๋น„ํŒ๊ณผ ํ’์ž",
22
- "๋ถ„๋…ธ": "๋ถ„๋…ธ์™€ ์ €ํ•ญ, ํˆฌ์Ÿ๊ณผ ๊ฐˆ๋“ฑ",
23
- "๊ธฐ๋Œ€": "์„ฑ์žฅ๊ณผ ๋„์ „, ๋ชจํ—˜๊ณผ ํฌ๋ง",
 
24
  }
25
 
26
- _LABEL_EMBS = sbert_model.encode(list(_EMOTION_DESCS.values()), convert_to_tensor=True)
27
-
 
28
  def process_voice_only(audio_input):
29
- # 1. ์Œ์„ฑ ์ž…๋ ฅ ํ™•์ธ
30
  if audio_input is None:
31
  return "์Œ์„ฑ์„ ๋…น์Œํ•ด์ฃผ์„ธ์š”.", ""
32
 
33
- # 2. STT (Speech to Text) ๋ณ€ํ™˜
34
  sr, y = audio_input
35
  y = y.astype(np.float32)
36
  y /= np.max(np.abs(y)) if np.max(np.abs(y)) > 0 else 1
37
-
38
  stt_result = stt_model({"sampling_rate": sr, "raw": y})
39
  final_text = stt_result["text"]
40
 
41
  if not final_text.strip():
42
- return "์Œ์„ฑ์ด ์ธ์‹๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ๋‹ค์‹œ ๋ง์”€ํ•ด์ฃผ์„ธ์š”.", ""
43
-
44
- # 3. ๊ฐ์ • ๋ถ„์„
45
- user_emb = sbert_model.encode(final_text, convert_to_tensor=True)
46
- scores = util.cos_sim(user_emb, _LABEL_EMBS)[0]
47
- best_emo = list(_EMOTION_DESCS.keys())[scores.argmax()]
48
-
49
- # 4. ๋„์„œ ์ถ”์ฒœ
50
  recs = df[df["emotion"] == best_emo].head(3)
51
-
52
  result_text = f"์ธ์‹๋œ ๋ฌธ์žฅ: \"{final_text}\"\n๋ถ„์„๋œ ๊ฐ์ •: {best_emo}\n\n"
53
-
54
  book_list = ""
55
  for _, row in recs.iterrows():
56
- book_list += f"{row['title']}\n {row['url']}\n\n"
57
-
58
  return result_text, book_list
59
 
60
- # --- Gradio UI ๊ตฌ์„ฑ ---
 
 
 
61
  with gr.Blocks() as demo:
62
- gr.Markdown("# Boolook: ์Œ์„ฑ ์ „์šฉ ๋งˆ์Œ ๋ถ„์„ ์ฑ… ์ถ”์ฒœ")
63
-
64
  with gr.Row():
65
  with gr.Column():
66
- # ์˜ค๋””์˜ค ์ž…๋ ฅ๋งŒ ์œ ์ง€
67
- audio_in = gr.Audio(label="๋งˆ์ดํฌ๋กœ ๋งˆ์Œ์„ ๋“ค๋ ค์ฃผ์„ธ์š”", sources=["microphone"])
68
- submit_btn = gr.Button("๋ถ„์„ ๋ฐ ์ถ”์ฒœ๋ฐ›๊ธฐ", variant="primary")
69
-
70
  with gr.Column():
71
  analysis_out = gr.Textbox(label="๋ถ„์„ ๊ฒฐ๊ณผ")
72
- books_out = gr.Textbox(label="์ถ”์ฒœ ๋„์„œ ๋ฆฌ์ŠคํŠธ")
73
 
74
- # ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ์Œ์„ฑ ์ฒ˜๋ฆฌ ํ•จ์ˆ˜ ์—ฐ๊ฒฐ
75
  submit_btn.click(
76
  fn=process_voice_only,
77
  inputs=[audio_in],
 
1
  import gradio as gr
2
  import pandas as pd
3
  import numpy as np
 
4
  from transformers import pipeline
5
 
6
+ # ===============================
7
+ # ๋ชจ๋ธ ๋กœ๋“œ
8
+ # ===============================
9
+ stt_model = pipeline(
10
+ "automatic-speech-recognition",
11
+ model="openai/whisper-large-v3-turbo",
12
+ device=0
13
+ )
14
 
15
+ emotion_model = pipeline(
16
+ "text-classification",
17
+ model="monologg/koelectra-base-v3-goemotions",
18
+ device=0,
19
+ top_k=1
20
+ )
21
+
22
+ # ๋ฐ์ดํ„ฐ ๋กœ๋“œ
23
  df = pd.read_csv("book_db_final.csv")
24
 
25
+ # ๊ฐ์ • ๋งคํ•‘ (Plutchik ์œ ์ง€)
26
+ EMOTION_MAP = {
27
+ "joy": "๊ธฐ์จ",
28
+ "sadness": "์Šฌํ””",
29
+ "anger": "๋ถ„๋…ธ",
30
+ "fear": "๊ณตํฌ",
31
+ "surprise": "๋†€๋žŒ",
32
+ "disgust": "ํ˜์˜ค",
33
+ "love": "์‹ ๋ขฐ",
34
+ "optimism": "๊ธฐ๋Œ€"
35
  }
36
 
37
+ # ===============================
38
+ # ๋ฉ”์ธ ์ฒ˜๋ฆฌ ํ•จ์ˆ˜
39
+ # ===============================
40
  def process_voice_only(audio_input):
 
41
  if audio_input is None:
42
  return "์Œ์„ฑ์„ ๋…น์Œํ•ด์ฃผ์„ธ์š”.", ""
43
 
44
+ # STT
45
  sr, y = audio_input
46
  y = y.astype(np.float32)
47
  y /= np.max(np.abs(y)) if np.max(np.abs(y)) > 0 else 1
48
+
49
  stt_result = stt_model({"sampling_rate": sr, "raw": y})
50
  final_text = stt_result["text"]
51
 
52
  if not final_text.strip():
53
+ return "์Œ์„ฑ์ด ์ธ์‹๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.", ""
54
+
55
+ # ๊ฐ์ • ๋ถ„์„ (classifier ์‚ฌ์šฉ)
56
+ emo_result = emotion_model(final_text)[0][0]
57
+ raw_label = emo_result["label"].lower()
58
+ best_emo = EMOTION_MAP.get(raw_label, "๊ธฐ๋Œ€")
59
+
60
+ # ์ถ”์ฒœ
61
  recs = df[df["emotion"] == best_emo].head(3)
62
+
63
  result_text = f"์ธ์‹๋œ ๋ฌธ์žฅ: \"{final_text}\"\n๋ถ„์„๋œ ๊ฐ์ •: {best_emo}\n\n"
64
+
65
  book_list = ""
66
  for _, row in recs.iterrows():
67
+ book_list += f"{row['title']}\n{row['url']}\n\n"
68
+
69
  return result_text, book_list
70
 
71
+
72
+ # ===============================
73
+ # UI
74
+ # ===============================
75
  with gr.Blocks() as demo:
76
+ gr.Markdown("# Boolook: ์Œ์„ฑ ๊ธฐ๋ฐ˜ ๊ฐ์ • ๋ถ„์„ ์ฑ… ์ถ”์ฒœ")
77
+
78
  with gr.Row():
79
  with gr.Column():
80
+ audio_in = gr.Audio(label="๋งˆ์ดํฌ ์ž…๋ ฅ", sources=["microphone"])
81
+ submit_btn = gr.Button("๋ถ„์„", variant="primary")
82
+
 
83
  with gr.Column():
84
  analysis_out = gr.Textbox(label="๋ถ„์„ ๊ฒฐ๊ณผ")
85
+ books_out = gr.Textbox(label="์ถ”์ฒœ ๋„์„œ")
86
 
 
87
  submit_btn.click(
88
  fn=process_voice_only,
89
  inputs=[audio_in],