Spaces:
Sleeping
Sleeping
| import os, uuid, threading | |
| from flask import Flask, render_template, request, jsonify | |
| from werkzeug.utils import secure_filename | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| from rag.vector_store import HybridVectorStore | |
| from rag.ingestor import PDFIngestor, URLIngestor, MAX_PDF_BYTES | |
| from graph.research_graph import ResearchGraph | |
| from tracing.tracer import Tracer | |
| from agents.llm_factory import AVAILABLE_MODELS, set_model, get_current_model | |
| app = Flask(__name__) | |
| app.secret_key = os.getenv("SECRET_KEY", os.urandom(24).hex()) | |
| UPLOAD_FOLDER = "/tmp/docmind_uploads" | |
| os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
| vector_store = HybridVectorStore() | |
| tracer = Tracer() | |
| graph = ResearchGraph(vector_store, tracer) | |
| queries: dict = {} | |
| def _clear_uploads() -> None: | |
| for f in os.listdir(UPLOAD_FOLDER): | |
| try: | |
| os.remove(os.path.join(UPLOAD_FOLDER, f)) | |
| except Exception: | |
| pass | |
| # ββ Routes ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def index(): | |
| return render_template("index.html") | |
| def health(): | |
| return jsonify({ | |
| "status": "ok", | |
| "docs_indexed": vector_store.doc_count, | |
| "chunks_stored": vector_store.chunk_count, | |
| "source": vector_store.source_label, | |
| "model": get_current_model()["label"], | |
| "token_set": bool(os.getenv("HF_TOKEN")), | |
| }) | |
| def api_models(): | |
| """Return available models and the currently selected one.""" | |
| current = get_current_model() | |
| return jsonify({ | |
| "models": AVAILABLE_MODELS, | |
| "current": current["key"], | |
| }) | |
| def api_set_model(): | |
| """Switch the active LLM model server-side.""" | |
| data = request.json or {} | |
| key = (data.get("model") or "").strip() | |
| if key not in AVAILABLE_MODELS: | |
| return jsonify({"error": f"Unknown model key '{key}'."}), 400 | |
| set_model(key) | |
| m = get_current_model() | |
| return jsonify({"success": True, "model": key, "label": m["label"]}) | |
| def upload(): | |
| if "file" not in request.files: | |
| return jsonify({"error": "No file attached."}), 400 | |
| f = request.files["file"] | |
| if not f.filename.lower().endswith(".pdf"): | |
| return jsonify({"error": "Only PDF files are supported."}), 400 | |
| f.seek(0, 2); size = f.tell(); f.seek(0) | |
| if size > MAX_PDF_BYTES: | |
| return jsonify({"error": f"File exceeds 10 MB limit ({size/1024/1024:.1f} MB)."}), 400 | |
| _clear_uploads() | |
| path = os.path.join(UPLOAD_FOLDER, secure_filename(f.filename)) | |
| f.save(path) | |
| try: | |
| chunks = PDFIngestor().ingest(path) | |
| vector_store.clear() | |
| vector_store.add_documents(chunks, source_label=f.filename) | |
| return jsonify({"success": True, "filename": f.filename, "chunks": len(chunks)}) | |
| except Exception as exc: | |
| return jsonify({"error": str(exc)}), 500 | |
| def ingest_url(): | |
| data = request.json or {} | |
| url = (data.get("url") or "").strip() | |
| if not url: | |
| return jsonify({"error": "URL is required."}), 400 | |
| if not url.startswith(("http://", "https://")): | |
| url = "https://" + url | |
| try: | |
| chunks = URLIngestor().ingest(url) | |
| vector_store.clear() | |
| _clear_uploads() | |
| vector_store.add_documents(chunks, source_label=url) | |
| return jsonify({"success": True, "url": url, "chunks": len(chunks)}) | |
| except Exception as exc: | |
| return jsonify({"error": str(exc)}), 500 | |
| def research(): | |
| data = request.json or {} | |
| question = (data.get("question") or "").strip() | |
| model = (data.get("model") or "").strip() | |
| if not question: | |
| return jsonify({"error": "Question is required."}), 400 | |
| if vector_store.doc_count == 0: | |
| return jsonify({"error": "No source loaded β please upload a PDF or fetch a URL first."}), 400 | |
| if model and model in AVAILABLE_MODELS: | |
| set_model(model) | |
| qid = str(uuid.uuid4()) | |
| queries[qid] = {"status": "running", "result": None} | |
| def _run(): | |
| try: | |
| result = graph.run(question, qid) | |
| queries[qid].update({"status": "complete", "result": result}) | |
| except Exception as exc: | |
| queries[qid].update({"status": "error", "result": {"error": str(exc)}}) | |
| threading.Thread(target=_run, daemon=True).start() | |
| return jsonify({"query_id": qid}) | |
| def get_trace(qid): | |
| q = queries.get(qid) | |
| if not q: | |
| return jsonify({"error": "Query not found."}), 404 | |
| return jsonify({"status": q["status"], "trace": tracer.get(qid), "result": q["result"]}) | |
| if __name__ == "__main__": | |
| app.run(host="0.0.0.0", port=7860, debug=False) | |