minjune121 commited on
Commit
ca68191
ยท
verified ยท
1 Parent(s): 3cd3e5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -28
app.py CHANGED
@@ -1,13 +1,15 @@
1
  import gradio as gr
2
  import pandas as pd
3
- import librosa
4
  import numpy as np
5
  from sentence_transformers import SentenceTransformer, util
6
  from transformers import pipeline
7
 
8
- # ๋ชจ๋ธ ๋กœ๋“œ (Hugging Face ์„œ๋ฒ„์—์„œ ์‹คํ–‰๋จ)
9
- stt_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny") # ๋น ๋ฅธ ์†๋„๋ฅผ ์œ„ํ•ด tiny ์‚ฌ์šฉ
 
10
  sbert_model = SentenceTransformer("jhgan/ko-sroberta-multitask")
 
 
11
  df = pd.read_csv("book_db_final.csv")
12
 
13
  _EMOTION_DESCS = {
@@ -20,55 +22,59 @@ _EMOTION_DESCS = {
20
  "๋ถ„๋…ธ": "๋ถ„๋…ธ์™€ ์ €ํ•ญ, ํˆฌ์Ÿ๊ณผ ๊ฐˆ๋“ฑ",
21
  "๊ธฐ๋Œ€": "์„ฑ์žฅ๊ณผ ๋„์ „, ๋ชจํ—˜๊ณผ ํฌ๋ง",
22
  }
 
23
  _LABEL_EMBS = sbert_model.encode(list(_EMOTION_DESCS.values()), convert_to_tensor=True)
24
 
25
- def process_voice_and_recommend(text_input, audio_input):
26
- # 1. ์Œ์„ฑ ๋ฐ์ดํ„ฐ๊ฐ€ ์žˆ์œผ๋ฉด STT๋กœ ํ…์ŠคํŠธ ๋ณ€ํ™˜
27
- final_text = text_input
28
- if audio_input is not None:
29
- sr, y = audio_input
30
- y = y.astype(np.float32)
31
- y /= np.max(np.abs(y)) if np.max(np.abs(y)) > 0 else 1
32
-
33
- # Whisper ๋ชจ๋ธ๋กœ ์Œ์„ฑ์„ ํ…์ŠคํŠธ๋กœ ๋ณ€ํ™˜
34
- stt_result = stt_model({"sampling_rate": sr, "raw": y})
35
- final_text = stt_result["text"]
 
36
 
37
- if not final_text:
38
- return "ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜๊ฑฐ๋‚˜ ์Œ์„ฑ์„ ๋…น์Œํ•ด์ฃผ์„ธ์š”.", ""
39
 
40
- # 2. ๊ฐ์ • ๋ถ„์„
41
  user_emb = sbert_model.encode(final_text, convert_to_tensor=True)
42
  scores = util.cos_sim(user_emb, _LABEL_EMBS)[0]
43
  best_emo = list(_EMOTION_DESCS.keys())[scores.argmax()]
44
 
45
- # 3. ๋„์„œ ์ถ”์ฒœ
46
  recs = df[df["emotion"] == best_emo].head(3)
47
- result_text = f"๐ŸŽจ ๋ถ„์„๋œ ๋ฌธ์žฅ: \"{final_text}\"\n๐ŸŽญ ๊ฐ์ •: {best_emo}\n\n"
 
48
 
49
  book_list = ""
50
  for _, row in recs.iterrows():
51
- book_list += f"๐Ÿ“– {row['title']}\n๐Ÿ”— {row['url']}\n\n"
52
-
53
  return result_text, book_list
54
 
55
- # ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
56
  with gr.Blocks() as demo:
57
- gr.Markdown("# ๐Ÿ“š Boolook: ์Œ์„ฑ ๊ธฐ๋ฐ˜ ๋งˆ์Œ ๋ถ„์„ ์ฑ… ์ถ”์ฒœ")
58
 
59
  with gr.Row():
60
  with gr.Column():
61
- text_in = gr.Textbox(label="์ง์ ‘ ์ž…๋ ฅ", placeholder="์˜ค๋Š˜ ๊ธฐ๋ถ„์ด ์–ด๋– ์‹ ๊ฐ€์š”?")
62
- audio_in = gr.Audio(label="๋งˆ์ดํฌ ๋…น์Œ", sources=["microphone"])
63
- submit_btn = gr.Button("๋ถ„์„ ๋ฐ ์ถ”์ฒœ๋ฐ›๊ธฐ")
64
 
65
  with gr.Column():
66
  analysis_out = gr.Textbox(label="๋ถ„์„ ๊ฒฐ๊ณผ")
67
  books_out = gr.Textbox(label="์ถ”์ฒœ ๋„์„œ ๋ฆฌ์ŠคํŠธ")
68
 
 
69
  submit_btn.click(
70
- fn=process_voice_and_recommend,
71
- inputs=[text_in, audio_in],
72
  outputs=[analysis_out, books_out]
73
  )
74
 
 
1
  import gradio as gr
2
  import pandas as pd
 
3
  import numpy as np
4
  from sentence_transformers import SentenceTransformer, util
5
  from transformers import pipeline
6
 
7
+ # 1. ๋ชจ๋ธ ๋ฐ ๋ฐ์ดํ„ฐ ๋กœ๋“œ
8
+ # ๋น ๋ฅธ ์†๋„๋ฅผ ์œ„ํ•ด whisper-tiny ์‚ฌ์šฉ
9
+ stt_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
10
  sbert_model = SentenceTransformer("jhgan/ko-sroberta-multitask")
11
+
12
+ # ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค ๋กœ๋“œ (ํŒŒ์ผ๋ช… ํ™•์ธ ํ•„์š”)
13
  df = pd.read_csv("book_db_final.csv")
14
 
15
  _EMOTION_DESCS = {
 
22
  "๋ถ„๋…ธ": "๋ถ„๋…ธ์™€ ์ €ํ•ญ, ํˆฌ์Ÿ๊ณผ ๊ฐˆ๋“ฑ",
23
  "๊ธฐ๋Œ€": "์„ฑ์žฅ๊ณผ ๋„์ „, ๋ชจํ—˜๊ณผ ํฌ๋ง",
24
  }
25
+
26
  _LABEL_EMBS = sbert_model.encode(list(_EMOTION_DESCS.values()), convert_to_tensor=True)
27
 
28
+ def process_voice_only(audio_input):
29
+ # 1. ์Œ์„ฑ ์ž…๋ ฅ ํ™•์ธ
30
+ if audio_input is None:
31
+ return "์Œ์„ฑ์„ ๋…น์Œํ•ด์ฃผ์„ธ์š”.", ""
32
+
33
+ # 2. STT (Speech to Text) ๋ณ€ํ™˜
34
+ sr, y = audio_input
35
+ y = y.astype(np.float32)
36
+ y /= np.max(np.abs(y)) if np.max(np.abs(y)) > 0 else 1
37
+
38
+ stt_result = stt_model({"sampling_rate": sr, "raw": y})
39
+ final_text = stt_result["text"]
40
 
41
+ if not final_text.strip():
42
+ return "์Œ์„ฑ์ด ์ธ์‹๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ๋‹ค์‹œ ๋ง์”€ํ•ด์ฃผ์„ธ์š”.", ""
43
 
44
+ # 3. ๊ฐ์ • ๋ถ„์„
45
  user_emb = sbert_model.encode(final_text, convert_to_tensor=True)
46
  scores = util.cos_sim(user_emb, _LABEL_EMBS)[0]
47
  best_emo = list(_EMOTION_DESCS.keys())[scores.argmax()]
48
 
49
+ # 4. ๋„์„œ ์ถ”์ฒœ
50
  recs = df[df["emotion"] == best_emo].head(3)
51
+
52
+ result_text = f"์ธ์‹๋œ ๋ฌธ์žฅ: \"{final_text}\"\n๋ถ„์„๋œ ๊ฐ์ •: {best_emo}\n\n"
53
 
54
  book_list = ""
55
  for _, row in recs.iterrows():
56
+ book_list += f"{row['title']}\n {row['url']}\n\n"
57
+
58
  return result_text, book_list
59
 
60
+ # --- Gradio UI ๊ตฌ์„ฑ ---
61
  with gr.Blocks() as demo:
62
+ gr.Markdown("# Boolook: ์Œ์„ฑ ์ „์šฉ ๋งˆ์Œ ๋ถ„์„ ์ฑ… ์ถ”์ฒœ")
63
 
64
  with gr.Row():
65
  with gr.Column():
66
+ # ์˜ค๋””์˜ค ์ž…๋ ฅ๋งŒ ์œ ์ง€
67
+ audio_in = gr.Audio(label="๋งˆ์ดํฌ๋กœ ๋งˆ์Œ์„ ๋“ค๋ ค์ฃผ์„ธ์š”", sources=["microphone"])
68
+ submit_btn = gr.Button("๋ถ„์„ ๋ฐ ์ถ”์ฒœ๋ฐ›๊ธฐ", variant="primary")
69
 
70
  with gr.Column():
71
  analysis_out = gr.Textbox(label="๋ถ„์„ ๊ฒฐ๊ณผ")
72
  books_out = gr.Textbox(label="์ถ”์ฒœ ๋„์„œ ๋ฆฌ์ŠคํŠธ")
73
 
74
+ # ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ์Œ์„ฑ ์ฒ˜๋ฆฌ ํ•จ์ˆ˜ ์—ฐ๊ฒฐ
75
  submit_btn.click(
76
+ fn=process_voice_only,
77
+ inputs=[audio_in],
78
  outputs=[analysis_out, books_out]
79
  )
80