Spaces:
Sleeping
Sleeping
Commit
·
d8a190c
1
Parent(s):
51277f6
Resolving docker error
Browse files
app.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
from doctr.models import ocr_predictor
|
| 3 |
from PIL import Image
|
| 4 |
import numpy as np
|
| 5 |
import torch
|
| 6 |
-
from
|
| 7 |
from langchain_core.documents import Document
|
| 8 |
import os
|
| 9 |
from groq import Groq
|
|
@@ -302,7 +301,7 @@ def query_qdrant_store(query_text, k=3):
|
|
| 302 |
retrieved_docs.append({
|
| 303 |
"source": doc.metadata['source'],
|
| 304 |
"content": doc.page_content,
|
| 305 |
-
"score": score,
|
| 306 |
"metadata": doc.metadata
|
| 307 |
})
|
| 308 |
return retrieved_docs
|
|
@@ -412,26 +411,38 @@ def add_documents_to_qdrant(docs):
|
|
| 412 |
raise
|
| 413 |
|
| 414 |
# -------------------------------
|
| 415 |
-
# Flask App
|
| 416 |
# -------------------------------
|
| 417 |
-
|
| 418 |
|
| 419 |
-
@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 420 |
def handle_query():
|
| 421 |
data = request.get_json()
|
| 422 |
-
query = data.get('query', '')
|
| 423 |
-
k = data.get('k', 3)
|
| 424 |
|
| 425 |
-
if not query:
|
| 426 |
return jsonify({"error": "No query provided"}), 400
|
| 427 |
|
|
|
|
|
|
|
|
|
|
| 428 |
try:
|
| 429 |
response_data = get_rag_response(query, k)
|
| 430 |
return jsonify(response_data)
|
| 431 |
except Exception as e:
|
| 432 |
return jsonify({"error": str(e)}), 500
|
| 433 |
|
| 434 |
-
@
|
| 435 |
def handle_upload():
|
| 436 |
if 'files' not in request.files:
|
| 437 |
return jsonify({"error": "No files provided"}), 400
|
|
@@ -476,88 +487,15 @@ def handle_upload():
|
|
| 476 |
"failed_files": failed_files
|
| 477 |
})
|
| 478 |
|
| 479 |
-
@
|
| 480 |
def health_check():
|
| 481 |
return jsonify({"status": "ok", "message": "API is running"})
|
| 482 |
|
| 483 |
-
# -------------------------------
|
| 484 |
-
# Gradio UI
|
| 485 |
-
# -------------------------------
|
| 486 |
-
def gradio_chat_response(message, history):
|
| 487 |
-
response_data = get_rag_response(message, k=3)
|
| 488 |
-
answer = response_data['answer']
|
| 489 |
-
sources = response_data['sources']
|
| 490 |
-
|
| 491 |
-
sources_md = "\n\n---\n**📚 Sources:**\n"
|
| 492 |
-
for doc in sources:
|
| 493 |
-
sources_md += f"* {doc['source']} (Score: {doc['score']:.4f})\n"
|
| 494 |
-
|
| 495 |
-
return answer + sources_md
|
| 496 |
-
|
| 497 |
-
def gradio_upload(file_list):
|
| 498 |
-
if not file_list:
|
| 499 |
-
return "No files uploaded."
|
| 500 |
-
|
| 501 |
-
docs_to_add = []
|
| 502 |
-
processed = 0
|
| 503 |
-
failed = 0
|
| 504 |
-
|
| 505 |
-
for file_obj in file_list:
|
| 506 |
-
full_path = file_obj.name
|
| 507 |
-
filename = os.path.basename(full_path)
|
| 508 |
-
|
| 509 |
-
try:
|
| 510 |
-
doc = process_single_file(full_path, filename)
|
| 511 |
-
if doc:
|
| 512 |
-
docs_to_add.append(doc)
|
| 513 |
-
processed += 1
|
| 514 |
-
else:
|
| 515 |
-
failed += 1
|
| 516 |
-
except Exception as e:
|
| 517 |
-
print(f"❌ Error: {e}")
|
| 518 |
-
failed += 1
|
| 519 |
-
|
| 520 |
-
if docs_to_add:
|
| 521 |
-
try:
|
| 522 |
-
add_documents_to_qdrant(docs_to_add)
|
| 523 |
-
except Exception as e:
|
| 524 |
-
return f"❌ Error: {e}"
|
| 525 |
-
|
| 526 |
-
return f"✅ Processed {processed} files. Failed: {failed}."
|
| 527 |
-
|
| 528 |
-
with gr.Blocks(theme="soft") as gradio_ui:
|
| 529 |
-
gr.Markdown("# 🧠 Multimodal RAG System")
|
| 530 |
-
|
| 531 |
-
with gr.Tabs():
|
| 532 |
-
with gr.TabItem("💬 Chat"):
|
| 533 |
-
gr.ChatInterface(
|
| 534 |
-
fn=gradio_chat_response,
|
| 535 |
-
title="Chat with Documents",
|
| 536 |
-
description="Ask questions about your documents",
|
| 537 |
-
examples=[
|
| 538 |
-
"What documents contain bar charts?",
|
| 539 |
-
"Summarize the environmental report",
|
| 540 |
-
"What are the key findings?"
|
| 541 |
-
]
|
| 542 |
-
)
|
| 543 |
-
|
| 544 |
-
with gr.TabItem("📤 Upload"):
|
| 545 |
-
gr.Markdown("Upload new documents to the knowledge base")
|
| 546 |
-
file_uploader = gr.File(
|
| 547 |
-
label="Upload Documents",
|
| 548 |
-
file_count="multiple",
|
| 549 |
-
file_types=["image", ".pdf", ".txt", ".md"]
|
| 550 |
-
)
|
| 551 |
-
upload_btn = gr.Button("Process Documents", variant="primary")
|
| 552 |
-
status = gr.Markdown("Ready to upload.")
|
| 553 |
-
|
| 554 |
-
upload_btn.click(fn=gradio_upload, inputs=[file_uploader], outputs=[status])
|
| 555 |
-
|
| 556 |
# -------------------------------
|
| 557 |
# Initialize and Run
|
| 558 |
# -------------------------------
|
| 559 |
if __name__ == "__main__":
|
| 560 |
-
print("🚀 Starting Multimodal RAG
|
| 561 |
|
| 562 |
# Build initial database if data folder exists
|
| 563 |
folder = "data"
|
|
@@ -565,19 +503,11 @@ if __name__ == "__main__":
|
|
| 565 |
print(f"\n📂 Found '{folder}' folder, building database...")
|
| 566 |
build_or_update_qdrant_store(folder)
|
| 567 |
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
gradio_ui.launch(prevent_thread_lock=True, show_error=True),
|
| 575 |
-
{'/api': flask_app}
|
| 576 |
-
)
|
| 577 |
-
|
| 578 |
-
print("\n✅ Server starting on http://0.0.0.0:7860")
|
| 579 |
-
print(" - Gradio UI: http://0.0.0.0:7860")
|
| 580 |
-
print(" - Flask API: http://0.0.0.0:7860/api/query")
|
| 581 |
-
print(" - Health Check: http://0.0.0.0:7860/api/health")
|
| 582 |
|
| 583 |
-
|
|
|
|
|
|
|
| 1 |
from doctr.models import ocr_predictor
|
| 2 |
from PIL import Image
|
| 3 |
import numpy as np
|
| 4 |
import torch
|
| 5 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 6 |
from langchain_core.documents import Document
|
| 7 |
import os
|
| 8 |
from groq import Groq
|
|
|
|
| 301 |
retrieved_docs.append({
|
| 302 |
"source": doc.metadata['source'],
|
| 303 |
"content": doc.page_content,
|
| 304 |
+
"score": float(score),
|
| 305 |
"metadata": doc.metadata
|
| 306 |
})
|
| 307 |
return retrieved_docs
|
|
|
|
| 411 |
raise
|
| 412 |
|
| 413 |
# -------------------------------
|
| 414 |
+
# Flask App - API ONLY
|
| 415 |
# -------------------------------
|
| 416 |
+
app = Flask(__name__)
|
| 417 |
|
| 418 |
+
@app.route('/', methods=['GET'])
|
| 419 |
+
def home():
|
| 420 |
+
return jsonify({
|
| 421 |
+
"message": "🧠 Multimodal RAG API",
|
| 422 |
+
"endpoints": {
|
| 423 |
+
"/query": "POST - Query documents",
|
| 424 |
+
"/upload": "POST - Upload files",
|
| 425 |
+
"/health": "GET - Health check"
|
| 426 |
+
}
|
| 427 |
+
})
|
| 428 |
+
|
| 429 |
+
@app.route('/query', methods=['POST'])
|
| 430 |
def handle_query():
|
| 431 |
data = request.get_json()
|
|
|
|
|
|
|
| 432 |
|
| 433 |
+
if not data or 'query' not in data:
|
| 434 |
return jsonify({"error": "No query provided"}), 400
|
| 435 |
|
| 436 |
+
query = data.get('query', '')
|
| 437 |
+
k = data.get('k', 3)
|
| 438 |
+
|
| 439 |
try:
|
| 440 |
response_data = get_rag_response(query, k)
|
| 441 |
return jsonify(response_data)
|
| 442 |
except Exception as e:
|
| 443 |
return jsonify({"error": str(e)}), 500
|
| 444 |
|
| 445 |
+
@app.route('/upload', methods=['POST'])
|
| 446 |
def handle_upload():
|
| 447 |
if 'files' not in request.files:
|
| 448 |
return jsonify({"error": "No files provided"}), 400
|
|
|
|
| 487 |
"failed_files": failed_files
|
| 488 |
})
|
| 489 |
|
| 490 |
+
@app.route('/health', methods=['GET'])
|
| 491 |
def health_check():
|
| 492 |
return jsonify({"status": "ok", "message": "API is running"})
|
| 493 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 494 |
# -------------------------------
|
| 495 |
# Initialize and Run
|
| 496 |
# -------------------------------
|
| 497 |
if __name__ == "__main__":
|
| 498 |
+
print("🚀 Starting Multimodal RAG API...")
|
| 499 |
|
| 500 |
# Build initial database if data folder exists
|
| 501 |
folder = "data"
|
|
|
|
| 503 |
print(f"\n📂 Found '{folder}' folder, building database...")
|
| 504 |
build_or_update_qdrant_store(folder)
|
| 505 |
|
| 506 |
+
print("\n✅ Flask API starting on http://0.0.0.0:7860")
|
| 507 |
+
print(" Endpoints:")
|
| 508 |
+
print(" - GET / (Home/Docs)")
|
| 509 |
+
print(" - POST /query")
|
| 510 |
+
print(" - POST /upload")
|
| 511 |
+
print(" - GET /health\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 512 |
|
| 513 |
+
app.run(host='0.0.0.0', port=7860, debug=False)
|