jeongsoo commited on
Commit
59014ef
Β·
1 Parent(s): c975e83

Fix requirements.txt file

Browse files
Files changed (2) hide show
  1. app/app.py +66 -82
  2. app_gradio.py +40 -19
app/app.py CHANGED
@@ -236,99 +236,83 @@ def chat():
236
 
237
  @app.route('/api/voice', methods=['POST'])
238
  def voice_chat():
239
- """μŒμ„± 기반 챗봇 API"""
240
- global retriever, app_ready
 
 
 
 
 
 
 
 
 
 
241
 
242
- # μ•± μ€€λΉ„ μƒνƒœ 확인
243
- if not app_ready:
244
- return jsonify({"error": "앱이 아직 μ΄ˆκΈ°ν™” μ€‘μž…λ‹ˆλ‹€. μž μ‹œ ν›„ λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”."}), 503
 
 
 
 
245
 
246
  try:
247
- # 파일이 μš”μ²­μ— ν¬ν•¨λ˜μ–΄ μžˆλŠ”μ§€ 확인
248
- if 'audio' not in request.files:
249
- return jsonify({"error": "μ˜€λ””μ˜€ 파일이 μ œκ³΅λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€."}), 400
250
 
251
- audio_file = request.files['audio']
 
 
252
 
253
- # 파일λͺ…이 λΉ„μ–΄μžˆλŠ”μ§€ 확인
254
- if audio_file.filename == '':
255
- return jsonify({"error": "μ„ νƒλœ 파일이 μ—†μŠ΅λ‹ˆλ‹€."}), 400
 
 
 
256
 
257
- # 파일 ν˜•μ‹ 확인
258
- if not allowed_audio_file(audio_file.filename):
259
- return jsonify({"error": "ν—ˆμš©λ˜μ§€ μ•ŠλŠ” 파일 ν˜•μ‹μž…λ‹ˆλ‹€."}), 400
 
260
 
261
- # μž„μ‹œ 파일둜 μ €μž₯ν•˜μ—¬ 처리
262
- with tempfile.NamedTemporaryFile(delete=False) as temp_file:
263
- audio_file.save(temp_file.name)
264
- temp_path = temp_file.name
265
 
266
- try:
267
- # 파일 읽기
268
- with open(temp_path, "rb") as f:
269
- audio_bytes = f.read()
270
-
271
- # STT둜 μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜
272
- logger.info("VITO STT둜 μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜ 쀑...")
273
- stt_result = stt_client.transcribe_audio(audio_bytes)
274
-
275
- if not stt_result["success"]:
276
- logger.error(f"STT 처리 μ‹€νŒ¨: {stt_result.get('error', 'Unknown error')}")
277
- return jsonify({
278
- "error": f"μŒμ„± 인식 μ‹€νŒ¨: {stt_result.get('error', 'Unknown error')}"
279
- }), 500
280
-
281
- query = stt_result["text"]
282
- logger.info(f"μΈμ‹λœ 쿼리: {query}")
283
-
284
- if not query:
285
- return jsonify({
286
- "error": "μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜ν•  수 μ—†μŠ΅λ‹ˆλ‹€. λ‹€μ‹œ μ‹œλ„ν•˜μ„Έμš”."
287
- }), 400
288
-
289
- # RAG 검색 μˆ˜ν–‰ (μž¬μˆœμœ„ν™” 적용)
290
- search_results = retriever.search(query, top_k=5, first_stage_k=20)
291
-
292
- # 검색 κ²°κ³Όμ—μ„œ μ»¨ν…μŠ€νŠΈ μΆ”μΆœ
293
- context = DocumentProcessor.prepare_rag_context(search_results, field="text")
294
-
295
- if not context:
296
- logger.warning("검색 κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
297
- return jsonify({
298
- "transcription": query,
299
- "answer": "μ£„μ†‘ν•©λ‹ˆλ‹€. κ΄€λ ¨ 정보λ₯Ό 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.",
300
- "sources": []
301
- })
302
-
303
- # LLM에 질의
304
- answer = llm_client.rag_generate(query, context)
305
-
306
- # μ†ŒμŠ€ 정보 μΆ”μΆœ
307
  sources = []
308
- for result in search_results:
309
- if "source" in result:
310
- source_info = {
311
- "source": result.get("source", "Unknown"),
312
- "score": result.get("rerank_score", result.get("score", 0))
313
- }
314
- sources.append(source_info)
315
-
316
- return jsonify({
317
- "transcription": query,
318
- "answer": answer,
319
- "sources": sources
320
- })
321
-
322
- finally:
323
- # μž„μ‹œ 파일 정리
324
- try:
325
- os.unlink(temp_path)
326
- except Exception as e:
327
- logger.error(f"μž„μ‹œ 파일 μ‚­μ œ 쀑 였λ₯˜ λ°œμƒ: {e}")
328
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
  except Exception as e:
330
- logger.error(f"μŒμ„± 처리 쀑 였λ₯˜ λ°œμƒ: {e}", exc_info=True)
331
- return jsonify({"error": f"처리 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"}), 500
 
 
 
332
 
333
  @app.route('/api/upload', methods=['POST'])
334
  def upload_document():
 
236
 
237
  @app.route('/api/voice', methods=['POST'])
238
  def voice_chat():
239
+ """
240
+ μŒμ„± μ±— API μ—”λ“œν¬μΈνŠΈ: μ˜€λ””μ˜€ νŒŒμΌμ„ λ°›μ•„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜ν•˜κ³ , μ§ˆλ¬Έμ— λŒ€ν•œ 응닡과 μ†ŒμŠ€λ₯Ό λ°˜ν™˜
241
+
242
+ Returns:
243
+ JSON 응닡:
244
+ - transcription: μΈμ‹λœ ν…μŠ€νŠΈ
245
+ - answer: LLMμ—μ„œ μƒμ„±ν•œ 응닡
246
+ - sources: κ²€μƒ‰λœ λ¬Έμ„œ μ†ŒμŠ€ (리슀트)
247
+ - error: 였λ₯˜ λ°œμƒ μ‹œ 였λ₯˜ λ©”μ‹œμ§€
248
+ - details: 였λ₯˜ 상세 정보 (선택적)
249
+ """
250
+ logger.info("μŒμ„± μ±— μš”μ²­ μˆ˜μ‹ ")
251
 
252
+ # μ˜€λ””μ˜€ 파일 확인
253
+ if 'audio' not in request.files:
254
+ logger.error("μ˜€λ””μ˜€ 파일이 μ œκ³΅λ˜μ§€ μ•ŠμŒ")
255
+ return jsonify({"error": "μ˜€λ””μ˜€ 파일이 μ œκ³΅λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€."}), 400
256
+
257
+ audio_file = request.files['audio']
258
+ logger.info(f"μˆ˜μ‹ λœ 파일: {audio_file.filename}")
259
 
260
  try:
261
+ # μ˜€λ””μ˜€ 파일 읽기
262
+ with audio_file.stream as f:
263
+ audio_bytes = f.read()
264
 
265
+ # μŒμ„±μΈμ‹ (VitoSTT)
266
+ stt = VitoSTT()
267
+ stt_result = stt.transcribe_audio(audio_bytes, language="ko")
268
 
269
+ if not stt_result["success"]:
270
+ logger.error(f"μŒμ„±μΈμ‹ μ‹€νŒ¨: {stt_result['error']}")
271
+ return jsonify({
272
+ "error": stt_result["error"],
273
+ "details": stt_result.get("details", "")
274
+ }), 500
275
 
276
+ transcription = stt_result["text"]
277
+ if not transcription:
278
+ logger.warning("μŒμ„±μΈμ‹ κ²°κ³Όκ°€ λΉ„μ–΄μžˆμŠ΅λ‹ˆλ‹€.")
279
+ return jsonify({"error": "μŒμ„±μ—μ„œ ν…μŠ€νŠΈλ₯Ό μΈμ‹ν•˜μ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€."}), 400
280
 
281
+ logger.info(f"μŒμ„±μΈμ‹ 성곡: {transcription[:50]}...")
 
 
 
282
 
283
+ # 검색기 호좜: μΈμ‹λœ ν…μŠ€νŠΈλ₯Ό 쿼리둜 μ‚¬μš©
284
+ sources = retriever.search(transcription)
285
+ if not sources:
286
+ logger.warning("κ²€μƒ‰λœ μ†ŒμŠ€κ°€ μ—†μŠ΅λ‹ˆλ‹€.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  sources = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
 
289
+ # μ†ŒμŠ€ λ¬Έμ„œ λ‚΄μš©μ„ μ»¨ν…μŠ€νŠΈλ‘œ μ€€λΉ„
290
+ context = "\n".join([doc["content"] for doc in sources])
291
+ logger.info(f"κ²€μƒ‰λœ μ†ŒμŠ€ 수: {len(sources)}")
292
+
293
+ # LLM 호좜: 질문과 μ»¨ν…μŠ€νŠΈλ₯Ό λ°”νƒ•μœΌλ‘œ 응닡 생성
294
+ prompt = f"질문: {transcription}\n\nμ»¨ν…μŠ€νŠΈ:\n{context}\n\nλ‹΅λ³€:"
295
+ answer = llm.generate(prompt)
296
+
297
+ if not answer:
298
+ logger.error("LLM 응닡 생성 μ‹€νŒ¨")
299
+ return jsonify({"error": "응닡 생성에 μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€."}), 500
300
+
301
+ logger.info(f"LLM 응닡 생성 성곡: {answer[:50]}...")
302
+
303
+ # 응닡 λ°˜ν™˜
304
+ return jsonify({
305
+ "transcription": transcription,
306
+ "answer": answer,
307
+ "sources": sources # [{ "content": "...", "metadata": {...} }, ...]
308
+ })
309
+
310
  except Exception as e:
311
+ logger.error(f"μŒμ„± μ±— 처리 쀑 였λ₯˜ λ°œμƒ: {str(e)}", exc_info=True)
312
+ return jsonify({
313
+ "error": "μŒμ„± 처리 쀑 λ‚΄λΆ€ 였λ₯˜ λ°œμƒ",
314
+ "details": str(e)
315
+ }), 500
316
 
317
  @app.route('/api/upload', methods=['POST'])
318
  def upload_document():
app_gradio.py CHANGED
@@ -1,11 +1,19 @@
1
  import os
2
  import gradio as gr
3
- import requests
4
  import json
 
 
 
5
 
6
- # Flask 앱은 더 이상 ν…ŒμŠ€νŠΈ ν΄λΌμ΄μ–ΈνŠΈλ‘œ μ‚¬μš©ν•˜μ§€ μ•ŠμŒ
7
- # λŒ€μ‹ , localhost:7860μ—μ„œ μ‹€ν–‰ 쀑인 Flask μ—”λ“œν¬μΈνŠΈμ— HTTP μš”μ²­μ„ 보냄
8
- BASE_URL = "http://localhost:7860"
 
 
 
 
 
 
9
 
10
  # Gradio μΈν„°νŽ˜μ΄μŠ€ 생성
11
  with gr.Blocks(title="RAG 검색 챗봇 with μŒμ„±μΈμ‹", theme=gr.themes.Soft()) as demo:
@@ -39,44 +47,57 @@ with gr.Blocks(title="RAG 검색 챗봇 with μŒμ„±μΈμ‹", theme=gr.themes.Soft
39
  if not query:
40
  return "μ§ˆλ¬Έμ„ μž…λ ₯ν•˜μ„Έμš”.", ""
41
  try:
42
- response = requests.post(f"{BASE_URL}/api/chat", json={"query": query})
43
- response.raise_for_status()
44
- data = response.json()
45
  if "error" in data:
 
46
  return data["error"], ""
47
  return data["answer"], json.dumps(data["sources"], indent=2)
48
- except requests.RequestException as e:
49
- return f"μš”μ²­ 쀑 였λ₯˜ λ°œμƒ: {str(e)}", ""
 
50
 
51
  # μŒμ„± μ±— κΈ°λŠ₯
52
  def handle_voice_chat(audio_file):
53
  if not audio_file:
54
  return "μŒμ„±μ„ μ—…λ‘œλ“œν•˜μ„Έμš”.", "", ""
55
  try:
 
56
  with open(audio_file, "rb") as f:
57
- response = requests.post(f"{BASE_URL}/api/voice", files={"audio": f})
58
- response.raise_for_status()
59
- data = response.json()
 
 
 
60
  if "error" in data:
 
61
  return "", data["error"], ""
62
  return data["transcription"], data["answer"], json.dumps(data["sources"], indent=2)
63
- except requests.RequestException as e:
64
- return "", f"μš”μ²­ 쀑 였λ₯˜ λ°œμƒ: {str(e)}", ""
 
65
 
66
  # λ¬Έμ„œ μ—…λ‘œλ“œ κΈ°λŠ₯
67
  def handle_doc_upload(doc_file):
68
  if not doc_file:
69
  return "λ¬Έμ„œλ₯Ό μ—…λ‘œλ“œν•˜μ„Έμš”."
70
  try:
 
71
  with open(doc_file, "rb") as f:
72
- response = requests.post(f"{BASE_URL}/api/upload", files={"document": f})
73
- response.raise_for_status()
74
- data = response.json()
 
 
75
  if "error" in data:
 
76
  return data["error"]
77
  return data["message"]
78
- except requests.RequestException as e:
79
- return f"μš”μ²­ 쀑 였λ₯˜ λ°œμƒ: {str(e)}"
 
80
 
81
  # 이벀트 ν•Έλ“€λŸ¬ μ—°κ²°
82
  text_button.click(
 
1
  import os
2
  import gradio as gr
 
3
  import json
4
+ import logging
5
+ from app.app import app as flask_app # Flask μ•± κ°€μ Έμ˜€κΈ°
6
+ from flask import json as flask_json
7
 
8
+ # 둜거 μ„€μ •
9
+ logging.basicConfig(
10
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
11
+ level=logging.INFO
12
+ )
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # Flask ν…ŒμŠ€νŠΈ ν΄λΌμ΄μ–ΈνŠΈ μ΄ˆκΈ°ν™”
16
+ flask_client = flask_app.test_client()
17
 
18
  # Gradio μΈν„°νŽ˜μ΄μŠ€ 생성
19
  with gr.Blocks(title="RAG 검색 챗봇 with μŒμ„±μΈμ‹", theme=gr.themes.Soft()) as demo:
 
47
  if not query:
48
  return "μ§ˆλ¬Έμ„ μž…λ ₯ν•˜μ„Έμš”.", ""
49
  try:
50
+ logger.info("ν…μŠ€νŠΈ μ±— μš”μ²­: /api/chat")
51
+ response = flask_client.post("/api/chat", json={"query": query})
52
+ data = flask_json.loads(response.data)
53
  if "error" in data:
54
+ logger.error(f"ν…μŠ€νŠΈ μ±— 였λ₯˜: {data['error']}")
55
  return data["error"], ""
56
  return data["answer"], json.dumps(data["sources"], indent=2)
57
+ except Exception as e:
58
+ logger.error(f"ν…μŠ€νŠΈ μ±— 처리 μ‹€νŒ¨: {str(e)}")
59
+ return f"처리 쀑 였λ₯˜ λ°œμƒ: {str(e)}", ""
60
 
61
  # μŒμ„± μ±— κΈ°λŠ₯
62
  def handle_voice_chat(audio_file):
63
  if not audio_file:
64
  return "μŒμ„±μ„ μ—…λ‘œλ“œν•˜μ„Έμš”.", "", ""
65
  try:
66
+ logger.info("μŒμ„± μ±— μš”μ²­: /api/voice")
67
  with open(audio_file, "rb") as f:
68
+ # Flask ν…ŒμŠ€νŠΈ ν΄λΌμ΄μ–ΈνŠΈλŠ” files 직접 지원 μ•ˆ ν•˜λ―€λ‘œ, 데이터λ₯Ό 읽어 전달
69
+ response = flask_client.post(
70
+ "/api/voice",
71
+ data={"audio": (f, "audio_file")}
72
+ )
73
+ data = flask_json.loads(response.data)
74
  if "error" in data:
75
+ logger.error(f"μŒμ„± μ±— 였λ₯˜: {data['error']}")
76
  return "", data["error"], ""
77
  return data["transcription"], data["answer"], json.dumps(data["sources"], indent=2)
78
+ except Exception as e:
79
+ logger.error(f"μŒμ„± μ±— 처리 μ‹€νŒ¨: {str(e)}")
80
+ return "", f"처리 쀑 였λ₯˜ λ°œμƒ: {str(e)}", ""
81
 
82
  # λ¬Έμ„œ μ—…λ‘œλ“œ κΈ°λŠ₯
83
  def handle_doc_upload(doc_file):
84
  if not doc_file:
85
  return "λ¬Έμ„œλ₯Ό μ—…λ‘œλ“œν•˜μ„Έμš”."
86
  try:
87
+ logger.info("λ¬Έμ„œ μ—…λ‘œλ“œ μš”μ²­: /api/upload")
88
  with open(doc_file, "rb") as f:
89
+ response = flask_client.post(
90
+ "/api/upload",
91
+ data={"document": (f, "document_file")}
92
+ )
93
+ data = flask_json.loads(response.data)
94
  if "error" in data:
95
+ logger.error(f"λ¬Έμ„œ μ—…λ‘œλ“œ 였λ₯˜: {data['error']}")
96
  return data["error"]
97
  return data["message"]
98
+ except Exception as e:
99
+ logger.error(f"λ¬Έμ„œ μ—…λ‘œλ“œ 처리 μ‹€νŒ¨: {str(e)}")
100
+ return f"처리 쀑 였λ₯˜ λ°œμƒ: {str(e)}"
101
 
102
  # 이벀트 ν•Έλ“€λŸ¬ μ—°κ²°
103
  text_button.click(