Fix requirements.txt file
Browse files- app/app.py +66 -82
- app_gradio.py +40 -19
app/app.py
CHANGED
|
@@ -236,99 +236,83 @@ def chat():
|
|
| 236 |
|
| 237 |
@app.route('/api/voice', methods=['POST'])
|
| 238 |
def voice_chat():
|
| 239 |
-
"""
|
| 240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
-
#
|
| 243 |
-
if not
|
| 244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
|
| 246 |
try:
|
| 247 |
-
# νμΌ
|
| 248 |
-
|
| 249 |
-
|
| 250 |
|
| 251 |
-
|
|
|
|
|
|
|
| 252 |
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
return jsonify({
|
|
|
|
|
|
|
|
|
|
| 256 |
|
| 257 |
-
|
| 258 |
-
if not
|
| 259 |
-
|
|
|
|
| 260 |
|
| 261 |
-
|
| 262 |
-
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
| 263 |
-
audio_file.save(temp_file.name)
|
| 264 |
-
temp_path = temp_file.name
|
| 265 |
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
# STTλ‘ μμ±μ ν
μ€νΈλ‘ λ³ν
|
| 272 |
-
logger.info("VITO STTλ‘ μμ±μ ν
μ€νΈλ‘ λ³ν μ€...")
|
| 273 |
-
stt_result = stt_client.transcribe_audio(audio_bytes)
|
| 274 |
-
|
| 275 |
-
if not stt_result["success"]:
|
| 276 |
-
logger.error(f"STT μ²λ¦¬ μ€ν¨: {stt_result.get('error', 'Unknown error')}")
|
| 277 |
-
return jsonify({
|
| 278 |
-
"error": f"μμ± μΈμ μ€ν¨: {stt_result.get('error', 'Unknown error')}"
|
| 279 |
-
}), 500
|
| 280 |
-
|
| 281 |
-
query = stt_result["text"]
|
| 282 |
-
logger.info(f"μΈμλ 쿼리: {query}")
|
| 283 |
-
|
| 284 |
-
if not query:
|
| 285 |
-
return jsonify({
|
| 286 |
-
"error": "μμ±μ ν
μ€νΈλ‘ λ³νν μ μμ΅λλ€. λ€μ μλνμΈμ."
|
| 287 |
-
}), 400
|
| 288 |
-
|
| 289 |
-
# RAG κ²μ μν (μ¬μμν μ μ©)
|
| 290 |
-
search_results = retriever.search(query, top_k=5, first_stage_k=20)
|
| 291 |
-
|
| 292 |
-
# κ²μ κ²°κ³Όμμ 컨ν
μ€νΈ μΆμΆ
|
| 293 |
-
context = DocumentProcessor.prepare_rag_context(search_results, field="text")
|
| 294 |
-
|
| 295 |
-
if not context:
|
| 296 |
-
logger.warning("κ²μ κ²°κ³Όκ° μμ΅λλ€.")
|
| 297 |
-
return jsonify({
|
| 298 |
-
"transcription": query,
|
| 299 |
-
"answer": "μ£μ‘ν©λλ€. κ΄λ ¨ μ 보λ₯Ό μ°Ύμ μ μμ΅λλ€.",
|
| 300 |
-
"sources": []
|
| 301 |
-
})
|
| 302 |
-
|
| 303 |
-
# LLMμ μ§μ
|
| 304 |
-
answer = llm_client.rag_generate(query, context)
|
| 305 |
-
|
| 306 |
-
# μμ€ μ 보 μΆμΆ
|
| 307 |
sources = []
|
| 308 |
-
for result in search_results:
|
| 309 |
-
if "source" in result:
|
| 310 |
-
source_info = {
|
| 311 |
-
"source": result.get("source", "Unknown"),
|
| 312 |
-
"score": result.get("rerank_score", result.get("score", 0))
|
| 313 |
-
}
|
| 314 |
-
sources.append(source_info)
|
| 315 |
-
|
| 316 |
-
return jsonify({
|
| 317 |
-
"transcription": query,
|
| 318 |
-
"answer": answer,
|
| 319 |
-
"sources": sources
|
| 320 |
-
})
|
| 321 |
-
|
| 322 |
-
finally:
|
| 323 |
-
# μμ νμΌ μ 리
|
| 324 |
-
try:
|
| 325 |
-
os.unlink(temp_path)
|
| 326 |
-
except Exception as e:
|
| 327 |
-
logger.error(f"μμ νμΌ μμ μ€ μ€λ₯ λ°μ: {e}")
|
| 328 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
except Exception as e:
|
| 330 |
-
logger.error(f"μμ± μ²λ¦¬ μ€ μ€λ₯ λ°μ: {e}", exc_info=True)
|
| 331 |
-
return jsonify({
|
|
|
|
|
|
|
|
|
|
| 332 |
|
| 333 |
@app.route('/api/upload', methods=['POST'])
|
| 334 |
def upload_document():
|
|
|
|
| 236 |
|
| 237 |
@app.route('/api/voice', methods=['POST'])
|
| 238 |
def voice_chat():
|
| 239 |
+
"""
|
| 240 |
+
μμ± μ± API μλν¬μΈνΈ: μ€λμ€ νμΌμ λ°μ ν
μ€νΈλ‘ λ³ννκ³ , μ§λ¬Έμ λν μλ΅κ³Ό μμ€λ₯Ό λ°ν
|
| 241 |
+
|
| 242 |
+
Returns:
|
| 243 |
+
JSON μλ΅:
|
| 244 |
+
- transcription: μΈμλ ν
μ€νΈ
|
| 245 |
+
- answer: LLMμμ μμ±ν μλ΅
|
| 246 |
+
- sources: κ²μλ λ¬Έμ μμ€ (리μ€νΈ)
|
| 247 |
+
- error: μ€λ₯ λ°μ μ μ€λ₯ λ©μμ§
|
| 248 |
+
- details: μ€λ₯ μμΈ μ 보 (μ νμ )
|
| 249 |
+
"""
|
| 250 |
+
logger.info("μμ± μ± μμ² μμ ")
|
| 251 |
|
| 252 |
+
# μ€λμ€ νμΌ νμΈ
|
| 253 |
+
if 'audio' not in request.files:
|
| 254 |
+
logger.error("μ€λμ€ νμΌμ΄ μ 곡λμ§ μμ")
|
| 255 |
+
return jsonify({"error": "μ€λμ€ νμΌμ΄ μ 곡λμ§ μμμ΅λλ€."}), 400
|
| 256 |
+
|
| 257 |
+
audio_file = request.files['audio']
|
| 258 |
+
logger.info(f"μμ λ νμΌ: {audio_file.filename}")
|
| 259 |
|
| 260 |
try:
|
| 261 |
+
# μ€λμ€ νμΌ μ½κΈ°
|
| 262 |
+
with audio_file.stream as f:
|
| 263 |
+
audio_bytes = f.read()
|
| 264 |
|
| 265 |
+
# μμ±μΈμ (VitoSTT)
|
| 266 |
+
stt = VitoSTT()
|
| 267 |
+
stt_result = stt.transcribe_audio(audio_bytes, language="ko")
|
| 268 |
|
| 269 |
+
if not stt_result["success"]:
|
| 270 |
+
logger.error(f"μμ±μΈμ μ€ν¨: {stt_result['error']}")
|
| 271 |
+
return jsonify({
|
| 272 |
+
"error": stt_result["error"],
|
| 273 |
+
"details": stt_result.get("details", "")
|
| 274 |
+
}), 500
|
| 275 |
|
| 276 |
+
transcription = stt_result["text"]
|
| 277 |
+
if not transcription:
|
| 278 |
+
logger.warning("μμ±μΈμ κ²°κ³Όκ° λΉμ΄μμ΅λλ€.")
|
| 279 |
+
return jsonify({"error": "μμ±μμ ν
μ€νΈλ₯Ό μΈμνμ§ λͺ»νμ΅λλ€."}), 400
|
| 280 |
|
| 281 |
+
logger.info(f"μμ±μΈμ μ±κ³΅: {transcription[:50]}...")
|
|
|
|
|
|
|
|
|
|
| 282 |
|
| 283 |
+
# κ²μκΈ° νΈμΆ: μΈμλ ν
μ€νΈλ₯Ό μΏΌλ¦¬λ‘ μ¬μ©
|
| 284 |
+
sources = retriever.search(transcription)
|
| 285 |
+
if not sources:
|
| 286 |
+
logger.warning("κ²μλ μμ€κ° μμ΅λλ€.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
sources = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
|
| 289 |
+
# μμ€ λ¬Έμ λ΄μ©μ 컨ν
μ€νΈλ‘ μ€λΉ
|
| 290 |
+
context = "\n".join([doc["content"] for doc in sources])
|
| 291 |
+
logger.info(f"κ²μλ μμ€ μ: {len(sources)}")
|
| 292 |
+
|
| 293 |
+
# LLM νΈμΆ: μ§λ¬Έκ³Ό 컨ν
μ€νΈλ₯Ό λ°νμΌλ‘ μλ΅ μμ±
|
| 294 |
+
prompt = f"μ§λ¬Έ: {transcription}\n\n컨ν
μ€νΈ:\n{context}\n\nλ΅λ³:"
|
| 295 |
+
answer = llm.generate(prompt)
|
| 296 |
+
|
| 297 |
+
if not answer:
|
| 298 |
+
logger.error("LLM μλ΅ μμ± μ€ν¨")
|
| 299 |
+
return jsonify({"error": "μλ΅ μμ±μ μ€ν¨νμ΅λλ€."}), 500
|
| 300 |
+
|
| 301 |
+
logger.info(f"LLM μλ΅ μμ± μ±κ³΅: {answer[:50]}...")
|
| 302 |
+
|
| 303 |
+
# μλ΅ λ°ν
|
| 304 |
+
return jsonify({
|
| 305 |
+
"transcription": transcription,
|
| 306 |
+
"answer": answer,
|
| 307 |
+
"sources": sources # [{ "content": "...", "metadata": {...} }, ...]
|
| 308 |
+
})
|
| 309 |
+
|
| 310 |
except Exception as e:
|
| 311 |
+
logger.error(f"μμ± μ± μ²λ¦¬ μ€ μ€λ₯ λ°μ: {str(e)}", exc_info=True)
|
| 312 |
+
return jsonify({
|
| 313 |
+
"error": "μμ± μ²λ¦¬ μ€ λ΄λΆ μ€λ₯ λ°μ",
|
| 314 |
+
"details": str(e)
|
| 315 |
+
}), 500
|
| 316 |
|
| 317 |
@app.route('/api/upload', methods=['POST'])
|
| 318 |
def upload_document():
|
app_gradio.py
CHANGED
|
@@ -1,11 +1,19 @@
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
| 3 |
-
import requests
|
| 4 |
import json
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
#
|
| 7 |
-
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
# Gradio μΈν°νμ΄μ€ μμ±
|
| 11 |
with gr.Blocks(title="RAG κ²μ μ±λ΄ with μμ±μΈμ", theme=gr.themes.Soft()) as demo:
|
|
@@ -39,44 +47,57 @@ with gr.Blocks(title="RAG κ²μ μ±λ΄ with μμ±μΈμ", theme=gr.themes.Soft
|
|
| 39 |
if not query:
|
| 40 |
return "μ§λ¬Έμ μ
λ ₯νμΈμ.", ""
|
| 41 |
try:
|
| 42 |
-
|
| 43 |
-
response.
|
| 44 |
-
data =
|
| 45 |
if "error" in data:
|
|
|
|
| 46 |
return data["error"], ""
|
| 47 |
return data["answer"], json.dumps(data["sources"], indent=2)
|
| 48 |
-
except
|
| 49 |
-
|
|
|
|
| 50 |
|
| 51 |
# μμ± μ± κΈ°λ₯
|
| 52 |
def handle_voice_chat(audio_file):
|
| 53 |
if not audio_file:
|
| 54 |
return "μμ±μ μ
λ‘λνμΈμ.", "", ""
|
| 55 |
try:
|
|
|
|
| 56 |
with open(audio_file, "rb") as f:
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
| 60 |
if "error" in data:
|
|
|
|
| 61 |
return "", data["error"], ""
|
| 62 |
return data["transcription"], data["answer"], json.dumps(data["sources"], indent=2)
|
| 63 |
-
except
|
| 64 |
-
|
|
|
|
| 65 |
|
| 66 |
# λ¬Έμ μ
λ‘λ κΈ°λ₯
|
| 67 |
def handle_doc_upload(doc_file):
|
| 68 |
if not doc_file:
|
| 69 |
return "λ¬Έμλ₯Ό μ
λ‘λνμΈμ."
|
| 70 |
try:
|
|
|
|
| 71 |
with open(doc_file, "rb") as f:
|
| 72 |
-
response =
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
| 75 |
if "error" in data:
|
|
|
|
| 76 |
return data["error"]
|
| 77 |
return data["message"]
|
| 78 |
-
except
|
| 79 |
-
|
|
|
|
| 80 |
|
| 81 |
# μ΄λ²€νΈ νΈλ€λ¬ μ°κ²°
|
| 82 |
text_button.click(
|
|
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
|
|
|
| 3 |
import json
|
| 4 |
+
import logging
|
| 5 |
+
from app.app import app as flask_app # Flask μ± κ°μ Έμ€κΈ°
|
| 6 |
+
from flask import json as flask_json
|
| 7 |
|
| 8 |
+
# λ‘κ±° μ€μ
|
| 9 |
+
logging.basicConfig(
|
| 10 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
| 11 |
+
level=logging.INFO
|
| 12 |
+
)
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
# Flask ν
μ€νΈ ν΄λΌμ΄μΈνΈ μ΄κΈ°ν
|
| 16 |
+
flask_client = flask_app.test_client()
|
| 17 |
|
| 18 |
# Gradio μΈν°νμ΄μ€ μμ±
|
| 19 |
with gr.Blocks(title="RAG κ²μ μ±λ΄ with μμ±μΈμ", theme=gr.themes.Soft()) as demo:
|
|
|
|
| 47 |
if not query:
|
| 48 |
return "μ§λ¬Έμ μ
λ ₯νμΈμ.", ""
|
| 49 |
try:
|
| 50 |
+
logger.info("ν
μ€νΈ μ± μμ²: /api/chat")
|
| 51 |
+
response = flask_client.post("/api/chat", json={"query": query})
|
| 52 |
+
data = flask_json.loads(response.data)
|
| 53 |
if "error" in data:
|
| 54 |
+
logger.error(f"ν
μ€νΈ μ± μ€λ₯: {data['error']}")
|
| 55 |
return data["error"], ""
|
| 56 |
return data["answer"], json.dumps(data["sources"], indent=2)
|
| 57 |
+
except Exception as e:
|
| 58 |
+
logger.error(f"ν
μ€νΈ μ± μ²λ¦¬ μ€ν¨: {str(e)}")
|
| 59 |
+
return f"μ²λ¦¬ μ€ μ€λ₯ λ°μ: {str(e)}", ""
|
| 60 |
|
| 61 |
# μμ± μ± κΈ°λ₯
|
| 62 |
def handle_voice_chat(audio_file):
|
| 63 |
if not audio_file:
|
| 64 |
return "μμ±μ μ
λ‘λνμΈμ.", "", ""
|
| 65 |
try:
|
| 66 |
+
logger.info("μμ± μ± μμ²: /api/voice")
|
| 67 |
with open(audio_file, "rb") as f:
|
| 68 |
+
# Flask ν
μ€νΈ ν΄λΌμ΄μΈνΈλ files μ§μ μ§μ μ νλ―λ‘, λ°μ΄ν°λ₯Ό μ½μ΄ μ λ¬
|
| 69 |
+
response = flask_client.post(
|
| 70 |
+
"/api/voice",
|
| 71 |
+
data={"audio": (f, "audio_file")}
|
| 72 |
+
)
|
| 73 |
+
data = flask_json.loads(response.data)
|
| 74 |
if "error" in data:
|
| 75 |
+
logger.error(f"μμ± μ± μ€λ₯: {data['error']}")
|
| 76 |
return "", data["error"], ""
|
| 77 |
return data["transcription"], data["answer"], json.dumps(data["sources"], indent=2)
|
| 78 |
+
except Exception as e:
|
| 79 |
+
logger.error(f"μμ± μ± μ²λ¦¬ μ€ν¨: {str(e)}")
|
| 80 |
+
return "", f"μ²λ¦¬ μ€ μ€λ₯ λ°μ: {str(e)}", ""
|
| 81 |
|
| 82 |
# λ¬Έμ μ
λ‘λ κΈ°λ₯
|
| 83 |
def handle_doc_upload(doc_file):
|
| 84 |
if not doc_file:
|
| 85 |
return "λ¬Έμλ₯Ό μ
λ‘λνμΈμ."
|
| 86 |
try:
|
| 87 |
+
logger.info("λ¬Έμ μ
λ‘λ μμ²: /api/upload")
|
| 88 |
with open(doc_file, "rb") as f:
|
| 89 |
+
response = flask_client.post(
|
| 90 |
+
"/api/upload",
|
| 91 |
+
data={"document": (f, "document_file")}
|
| 92 |
+
)
|
| 93 |
+
data = flask_json.loads(response.data)
|
| 94 |
if "error" in data:
|
| 95 |
+
logger.error(f"λ¬Έμ μ
λ‘λ μ€λ₯: {data['error']}")
|
| 96 |
return data["error"]
|
| 97 |
return data["message"]
|
| 98 |
+
except Exception as e:
|
| 99 |
+
logger.error(f"λ¬Έμ μ
λ‘λ μ²λ¦¬ μ€ν¨: {str(e)}")
|
| 100 |
+
return f"μ²λ¦¬ μ€ μ€λ₯ λ°μ: {str(e)}"
|
| 101 |
|
| 102 |
# μ΄λ²€νΈ νΈλ€λ¬ μ°κ²°
|
| 103 |
text_button.click(
|