Spaces:

mnoorchenar
/

docmind

Running

File size: 5,062 Bytes

import os, uuid, threading
from flask import Flask, render_template, request, jsonify
from werkzeug.utils import secure_filename
from dotenv import load_dotenv

load_dotenv()

from rag.vector_store     import HybridVectorStore
from rag.ingestor         import PDFIngestor, URLIngestor, MAX_PDF_BYTES
from graph.research_graph import ResearchGraph
from tracing.tracer       import Tracer
from agents.llm_factory   import AVAILABLE_MODELS, set_model, get_current_model

app = Flask(__name__)
app.secret_key = os.getenv("SECRET_KEY", os.urandom(24).hex())

UPLOAD_FOLDER = "/tmp/docmind_uploads"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

vector_store = HybridVectorStore()
tracer       = Tracer()
graph        = ResearchGraph(vector_store, tracer)
queries: dict = {}


def _clear_uploads() -> None:
    for f in os.listdir(UPLOAD_FOLDER):
        try:
            os.remove(os.path.join(UPLOAD_FOLDER, f))
        except Exception:
            pass


# ── Routes ────────────────────────────────────────────────────────────────────

@app.route("/")
def index():
    return render_template("index.html")


@app.route("/health")
def health():
    return jsonify({
        "status":        "ok",
        "docs_indexed":  vector_store.doc_count,
        "chunks_stored": vector_store.chunk_count,
        "source":        vector_store.source_label,
        "model":         get_current_model()["label"],
        "token_set":     bool(os.getenv("HF_TOKEN")),
    })


@app.route("/api/models")
def api_models():
    """Return available models and the currently selected one."""
    current = get_current_model()
    return jsonify({
        "models":  AVAILABLE_MODELS,
        "current": current["key"],
    })


@app.route("/api/set_model", methods=["POST"])
def api_set_model():
    """Switch the active LLM model server-side."""
    data = request.json or {}
    key  = (data.get("model") or "").strip()
    if key not in AVAILABLE_MODELS:
        return jsonify({"error": f"Unknown model key '{key}'."}), 400
    set_model(key)
    m = get_current_model()
    return jsonify({"success": True, "model": key, "label": m["label"]})


@app.route("/api/upload", methods=["POST"])
def upload():
    if "file" not in request.files:
        return jsonify({"error": "No file attached."}), 400
    f = request.files["file"]
    if not f.filename.lower().endswith(".pdf"):
        return jsonify({"error": "Only PDF files are supported."}), 400
    f.seek(0, 2); size = f.tell(); f.seek(0)
    if size > MAX_PDF_BYTES:
        return jsonify({"error": f"File exceeds 10 MB limit ({size/1024/1024:.1f} MB)."}), 400
    _clear_uploads()
    path = os.path.join(UPLOAD_FOLDER, secure_filename(f.filename))
    f.save(path)
    try:
        chunks = PDFIngestor().ingest(path)
        vector_store.clear()
        vector_store.add_documents(chunks, source_label=f.filename)
        return jsonify({"success": True, "filename": f.filename, "chunks": len(chunks)})
    except Exception as exc:
        return jsonify({"error": str(exc)}), 500


@app.route("/api/ingest_url", methods=["POST"])
def ingest_url():
    data = request.json or {}
    url  = (data.get("url") or "").strip()
    if not url:
        return jsonify({"error": "URL is required."}), 400
    if not url.startswith(("http://", "https://")):
        url = "https://" + url
    try:
        chunks = URLIngestor().ingest(url)
        vector_store.clear()
        _clear_uploads()
        vector_store.add_documents(chunks, source_label=url)
        return jsonify({"success": True, "url": url, "chunks": len(chunks)})
    except Exception as exc:
        return jsonify({"error": str(exc)}), 500


@app.route("/api/research", methods=["POST"])
def research():
    data     = request.json or {}
    question = (data.get("question") or "").strip()
    model    = (data.get("model") or "").strip()

    if not question:
        return jsonify({"error": "Question is required."}), 400
    if vector_store.doc_count == 0:
        return jsonify({"error": "No source loaded — please upload a PDF or fetch a URL first."}), 400
    if model and model in AVAILABLE_MODELS:
        set_model(model)

    qid          = str(uuid.uuid4())
    queries[qid] = {"status": "running", "result": None}

    def _run():
        try:
            result = graph.run(question, qid)
            queries[qid].update({"status": "complete", "result": result})
        except Exception as exc:
            queries[qid].update({"status": "error", "result": {"error": str(exc)}})

    threading.Thread(target=_run, daemon=True).start()
    return jsonify({"query_id": qid})


@app.route("/api/trace/<qid>")
def get_trace(qid):
    q = queries.get(qid)
    if not q:
        return jsonify({"error": "Query not found."}), 404
    return jsonify({"status": q["status"], "trace": tracer.get(qid), "result": q["result"]})


if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860, debug=False)