File size: 5,062 Bytes
5aa2260
 
 
 
 
 
 
4088106
 
5aa2260
4088106
f9b41cf
5aa2260
f56271e
5aa2260
 
 
 
 
 
 
 
f9b41cf
5aa2260
 
f9b41cf
fed2a4b
 
 
 
 
 
 
f9b41cf
 
5aa2260
 
 
 
 
 
 
 
fed2a4b
 
5aa2260
fed2a4b
f9b41cf
fed2a4b
caf0d13
 
 
f9b41cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5aa2260
 
 
 
 
 
 
4088106
fed2a4b
 
 
5aa2260
 
 
 
fed2a4b
 
4088106
fed2a4b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4088106
5aa2260
 
 
 
 
 
 
 
f9b41cf
 
5aa2260
 
 
4088106
f9b41cf
 
 
 
5aa2260
 
 
 
 
4088106
5aa2260
4088106
5aa2260
 
 
 
 
 
 
 
 
 
 
 
 
 
fed2a4b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import os, uuid, threading
from flask import Flask, render_template, request, jsonify
from werkzeug.utils import secure_filename
from dotenv import load_dotenv

load_dotenv()

from rag.vector_store     import HybridVectorStore
from rag.ingestor         import PDFIngestor, URLIngestor, MAX_PDF_BYTES
from graph.research_graph import ResearchGraph
from tracing.tracer       import Tracer
from agents.llm_factory   import AVAILABLE_MODELS, set_model, get_current_model

app = Flask(__name__)
app.secret_key = os.getenv("SECRET_KEY", os.urandom(24).hex())

UPLOAD_FOLDER = "/tmp/docmind_uploads"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

vector_store = HybridVectorStore()
tracer       = Tracer()
graph        = ResearchGraph(vector_store, tracer)
queries: dict = {}


def _clear_uploads() -> None:
    for f in os.listdir(UPLOAD_FOLDER):
        try:
            os.remove(os.path.join(UPLOAD_FOLDER, f))
        except Exception:
            pass


# ── Routes ────────────────────────────────────────────────────────────────────

@app.route("/")
def index():
    return render_template("index.html")


@app.route("/health")
def health():
    return jsonify({
        "status":        "ok",
        "docs_indexed":  vector_store.doc_count,
        "chunks_stored": vector_store.chunk_count,
        "source":        vector_store.source_label,
        "model":         get_current_model()["label"],
        "token_set":     bool(os.getenv("HF_TOKEN")),
    })


@app.route("/api/models")
def api_models():
    """Return available models and the currently selected one."""
    current = get_current_model()
    return jsonify({
        "models":  AVAILABLE_MODELS,
        "current": current["key"],
    })


@app.route("/api/set_model", methods=["POST"])
def api_set_model():
    """Switch the active LLM model server-side."""
    data = request.json or {}
    key  = (data.get("model") or "").strip()
    if key not in AVAILABLE_MODELS:
        return jsonify({"error": f"Unknown model key '{key}'."}), 400
    set_model(key)
    m = get_current_model()
    return jsonify({"success": True, "model": key, "label": m["label"]})


@app.route("/api/upload", methods=["POST"])
def upload():
    if "file" not in request.files:
        return jsonify({"error": "No file attached."}), 400
    f = request.files["file"]
    if not f.filename.lower().endswith(".pdf"):
        return jsonify({"error": "Only PDF files are supported."}), 400
    f.seek(0, 2); size = f.tell(); f.seek(0)
    if size > MAX_PDF_BYTES:
        return jsonify({"error": f"File exceeds 10 MB limit ({size/1024/1024:.1f} MB)."}), 400
    _clear_uploads()
    path = os.path.join(UPLOAD_FOLDER, secure_filename(f.filename))
    f.save(path)
    try:
        chunks = PDFIngestor().ingest(path)
        vector_store.clear()
        vector_store.add_documents(chunks, source_label=f.filename)
        return jsonify({"success": True, "filename": f.filename, "chunks": len(chunks)})
    except Exception as exc:
        return jsonify({"error": str(exc)}), 500


@app.route("/api/ingest_url", methods=["POST"])
def ingest_url():
    data = request.json or {}
    url  = (data.get("url") or "").strip()
    if not url:
        return jsonify({"error": "URL is required."}), 400
    if not url.startswith(("http://", "https://")):
        url = "https://" + url
    try:
        chunks = URLIngestor().ingest(url)
        vector_store.clear()
        _clear_uploads()
        vector_store.add_documents(chunks, source_label=url)
        return jsonify({"success": True, "url": url, "chunks": len(chunks)})
    except Exception as exc:
        return jsonify({"error": str(exc)}), 500


@app.route("/api/research", methods=["POST"])
def research():
    data     = request.json or {}
    question = (data.get("question") or "").strip()
    model    = (data.get("model") or "").strip()

    if not question:
        return jsonify({"error": "Question is required."}), 400
    if vector_store.doc_count == 0:
        return jsonify({"error": "No source loaded β€” please upload a PDF or fetch a URL first."}), 400
    if model and model in AVAILABLE_MODELS:
        set_model(model)

    qid          = str(uuid.uuid4())
    queries[qid] = {"status": "running", "result": None}

    def _run():
        try:
            result = graph.run(question, qid)
            queries[qid].update({"status": "complete", "result": result})
        except Exception as exc:
            queries[qid].update({"status": "error", "result": {"error": str(exc)}})

    threading.Thread(target=_run, daemon=True).start()
    return jsonify({"query_id": qid})


@app.route("/api/trace/<qid>")
def get_trace(qid):
    q = queries.get(qid)
    if not q:
        return jsonify({"error": "Query not found."}), 404
    return jsonify({"status": q["status"], "trace": tracer.get(qid), "result": q["result"]})


if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860, debug=False)