Spaces:
Running
Running
File size: 5,062 Bytes
5aa2260 4088106 5aa2260 4088106 f9b41cf 5aa2260 f56271e 5aa2260 f9b41cf 5aa2260 f9b41cf fed2a4b f9b41cf 5aa2260 fed2a4b 5aa2260 fed2a4b f9b41cf fed2a4b caf0d13 f9b41cf 5aa2260 4088106 fed2a4b 5aa2260 fed2a4b 4088106 fed2a4b 4088106 5aa2260 f9b41cf 5aa2260 4088106 f9b41cf 5aa2260 4088106 5aa2260 4088106 5aa2260 fed2a4b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | import os, uuid, threading
from flask import Flask, render_template, request, jsonify
from werkzeug.utils import secure_filename
from dotenv import load_dotenv
load_dotenv()
from rag.vector_store import HybridVectorStore
from rag.ingestor import PDFIngestor, URLIngestor, MAX_PDF_BYTES
from graph.research_graph import ResearchGraph
from tracing.tracer import Tracer
from agents.llm_factory import AVAILABLE_MODELS, set_model, get_current_model
app = Flask(__name__)
app.secret_key = os.getenv("SECRET_KEY", os.urandom(24).hex())
UPLOAD_FOLDER = "/tmp/docmind_uploads"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
vector_store = HybridVectorStore()
tracer = Tracer()
graph = ResearchGraph(vector_store, tracer)
queries: dict = {}
def _clear_uploads() -> None:
for f in os.listdir(UPLOAD_FOLDER):
try:
os.remove(os.path.join(UPLOAD_FOLDER, f))
except Exception:
pass
# ββ Routes ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@app.route("/")
def index():
return render_template("index.html")
@app.route("/health")
def health():
return jsonify({
"status": "ok",
"docs_indexed": vector_store.doc_count,
"chunks_stored": vector_store.chunk_count,
"source": vector_store.source_label,
"model": get_current_model()["label"],
"token_set": bool(os.getenv("HF_TOKEN")),
})
@app.route("/api/models")
def api_models():
"""Return available models and the currently selected one."""
current = get_current_model()
return jsonify({
"models": AVAILABLE_MODELS,
"current": current["key"],
})
@app.route("/api/set_model", methods=["POST"])
def api_set_model():
"""Switch the active LLM model server-side."""
data = request.json or {}
key = (data.get("model") or "").strip()
if key not in AVAILABLE_MODELS:
return jsonify({"error": f"Unknown model key '{key}'."}), 400
set_model(key)
m = get_current_model()
return jsonify({"success": True, "model": key, "label": m["label"]})
@app.route("/api/upload", methods=["POST"])
def upload():
if "file" not in request.files:
return jsonify({"error": "No file attached."}), 400
f = request.files["file"]
if not f.filename.lower().endswith(".pdf"):
return jsonify({"error": "Only PDF files are supported."}), 400
f.seek(0, 2); size = f.tell(); f.seek(0)
if size > MAX_PDF_BYTES:
return jsonify({"error": f"File exceeds 10 MB limit ({size/1024/1024:.1f} MB)."}), 400
_clear_uploads()
path = os.path.join(UPLOAD_FOLDER, secure_filename(f.filename))
f.save(path)
try:
chunks = PDFIngestor().ingest(path)
vector_store.clear()
vector_store.add_documents(chunks, source_label=f.filename)
return jsonify({"success": True, "filename": f.filename, "chunks": len(chunks)})
except Exception as exc:
return jsonify({"error": str(exc)}), 500
@app.route("/api/ingest_url", methods=["POST"])
def ingest_url():
data = request.json or {}
url = (data.get("url") or "").strip()
if not url:
return jsonify({"error": "URL is required."}), 400
if not url.startswith(("http://", "https://")):
url = "https://" + url
try:
chunks = URLIngestor().ingest(url)
vector_store.clear()
_clear_uploads()
vector_store.add_documents(chunks, source_label=url)
return jsonify({"success": True, "url": url, "chunks": len(chunks)})
except Exception as exc:
return jsonify({"error": str(exc)}), 500
@app.route("/api/research", methods=["POST"])
def research():
data = request.json or {}
question = (data.get("question") or "").strip()
model = (data.get("model") or "").strip()
if not question:
return jsonify({"error": "Question is required."}), 400
if vector_store.doc_count == 0:
return jsonify({"error": "No source loaded β please upload a PDF or fetch a URL first."}), 400
if model and model in AVAILABLE_MODELS:
set_model(model)
qid = str(uuid.uuid4())
queries[qid] = {"status": "running", "result": None}
def _run():
try:
result = graph.run(question, qid)
queries[qid].update({"status": "complete", "result": result})
except Exception as exc:
queries[qid].update({"status": "error", "result": {"error": str(exc)}})
threading.Thread(target=_run, daemon=True).start()
return jsonify({"query_id": qid})
@app.route("/api/trace/<qid>")
def get_trace(qid):
q = queries.get(qid)
if not q:
return jsonify({"error": "Query not found."}), 404
return jsonify({"status": q["status"], "trace": tracer.get(qid), "result": q["result"]})
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860, debug=False)
|