mnoorchenar commited on
Commit
5aa2260
·
1 Parent(s): f56271e

Update 2026-03-22 00:46:55

Browse files
.env.example ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ── Required ──────────────────────────────────────────────────────────────
2
+ # Free HuggingFace token (Read scope is sufficient)
3
+ # Get yours: https://huggingface.co/settings/tokens
4
+ HF_TOKEN=hf_your_token_here
5
+
6
+ # ── Optional ──────────────────────────────────────────────────────────────
7
+ # Flask session secret (auto-generated if not set)
8
+ SECRET_KEY=change_me_to_a_random_string
9
+
10
+ # Embedding model (runs locally — no token required for this one)
11
+ EMBED_MODEL=BAAI/bge-small-en-v1.5
12
+
13
+ # Planner + Generator model (HF Inference API)
14
+ GENERATOR_MODEL=mistralai/Mistral-7B-Instruct-v0.3
15
+
16
+ # Grader + Critic model (HF Inference API)
17
+ CRITIC_MODEL=HuggingFaceH4/zephyr-7b-beta
Dockerfile CHANGED
@@ -1,7 +1,23 @@
1
- FROM python:3.11-slim
 
2
  WORKDIR /app
 
 
 
 
 
3
  COPY requirements.txt .
4
  RUN pip install --no-cache-dir -r requirements.txt
 
 
 
 
5
  COPY . .
 
 
 
 
6
  EXPOSE 7860
7
- CMD ["python", "app.py"]
 
 
 
1
+ FROM python:3.10-slim
2
+
3
  WORKDIR /app
4
+
5
+ # System deps for faiss and sentence-transformers
6
+ RUN apt-get update && apt-get install -y --no-install-recommends \
7
+ build-essential libgomp1 && rm -rf /var/lib/apt/lists/*
8
+
9
  COPY requirements.txt .
10
  RUN pip install --no-cache-dir -r requirements.txt
11
+
12
+ # Pre-download the embedding model so first request is fast
13
+ RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('BAAI/bge-small-en-v1.5')"
14
+
15
  COPY . .
16
+
17
+ RUN useradd -m -u 1000 appuser && chown -R appuser /app
18
+ USER appuser
19
+
20
  EXPOSE 7860
21
+ ENV PYTHONUNBUFFERED=1
22
+
23
+ CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "1", "--timeout", "180", "--keep-alive", "5", "app:app"]
README.md CHANGED
@@ -1,8 +1,229 @@
1
- ---
2
- title: docmind
3
- colorFrom: purple
4
- colorTo: blue
5
  sdk: docker
6
- app_port: 7860
7
- pinned: false
8
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: DocMind-Agentic-Research
3
+ colorFrom: blue
4
+ colorTo: indigo
5
  sdk: docker
 
 
6
  ---
7
+
8
+ <div align="center">
9
+
10
+ <h1>🧠 DocMind — Agentic Research Platform</h1>
11
+ <img src="https://readme-typing-svg.demolab.com?font=Fira+Code&size=22&duration=3000&pause=1000&color=4f8ef7&center=true&vCenter=true&width=700&lines=LangGraph+%C2%B7+5+Agents+%C2%B7+Corrective+RAG;Multi-Agent+Orchestration+%C2%B7+Human-in-the-Loop;Deployed+Free+on+HuggingFace+Spaces" alt="Typing SVG"/>
12
+
13
+ <br/>
14
+
15
+ [![Python](https://img.shields.io/badge/Python-3.10+-3b82f6?style=for-the-badge&logo=python&logoColor=white)](https://www.python.org/)
16
+ [![LangGraph](https://img.shields.io/badge/LangGraph-0.2-06b6d4?style=for-the-badge)](https://github.com/langchain-ai/langgraph)
17
+ [![LangChain](https://img.shields.io/badge/LangChain-0.3-4f46e5?style=for-the-badge)](https://langchain.com/)
18
+ [![Flask](https://img.shields.io/badge/Flask-3.1-3b82f6?style=for-the-badge&logo=flask&logoColor=white)](https://flask.palletsprojects.com/)
19
+ [![Docker](https://img.shields.io/badge/Docker-Ready-3b82f6?style=for-the-badge&logo=docker&logoColor=white)](https://www.docker.com/)
20
+ [![HuggingFace](https://img.shields.io/badge/HuggingFace-Spaces-ffcc00?style=for-the-badge&logo=huggingface&logoColor=black)](https://huggingface.co/mnoorchenar/spaces)
21
+ [![Status](https://img.shields.io/badge/Status-Active-22c55e?style=for-the-badge)](#)
22
+
23
+ <br/>
24
+
25
+ **🧠 DocMind** — A production-grade agentic document research platform. Five specialized LangGraph agents plan, retrieve, grade, generate, and critique answers from uploaded PDFs using Corrective RAG, hybrid search, human-in-the-loop review, and LangSmith-style observability — all running free on HuggingFace Spaces.
26
+
27
+ <br/>
28
+
29
+ ---
30
+
31
+ </div>
32
+
33
+ ## Table of Contents
34
+ - [Features](#-features)
35
+ - [Architecture](#️-architecture)
36
+ - [Getting Started](#-getting-started)
37
+ - [Docker Deployment](#-docker-deployment)
38
+ - [Dashboard Modules](#-dashboard-modules)
39
+ - [ML Models](#-ml-models)
40
+ - [Project Structure](#-project-structure)
41
+ - [Author](#-author)
42
+ - [Contributing](#-contributing)
43
+ - [Disclaimer](#disclaimer)
44
+ - [License](#-license)
45
+
46
+ ---
47
+
48
+ ## ✨ Features
49
+
50
+ <table>
51
+ <tr><td>🧠 <b>LangGraph State Machine</b></td><td>Five agents wired into a cyclic StateGraph with conditional edges and Corrective RAG rewrite loops.</td></tr>
52
+ <tr><td>🔍 <b>Hybrid RAG (FAISS + BM25)</b></td><td>Semantic vector search combined with BM25 keyword search, fused via Reciprocal Rank Fusion for precision retrieval.</td></tr>
53
+ <tr><td>🤖 <b>Multi-Agent Orchestration</b></td><td>Planner, Retriever, Grader, Generator, and Critic agents each with specialized roles and distinct LLM temperature settings.</td></tr>
54
+ <tr><td>👁️ <b>Human-in-the-Loop</b></td><td>Answers failing the Critic agent's quality threshold are routed to a human review queue before delivery.</td></tr>
55
+ <tr><td>📊 <b>Observability Dashboard</b></td><td>Per-agent call counts, average latency, and Chart.js visualizations — LangSmith-style tracing without the paid tier.</td></tr>
56
+ <tr><td>🔧 <b>Tool Use / Function Calling</b></td><td>Three real tools: DuckDuckGo web search, safe AST calculator, and sandboxed Python code execution.</td></tr>
57
+ <tr><td>🔒 <b>Secure by Design</b></td><td>Stateless REST backend, no user data persisted, sandboxed code tool with restricted builtins only.</td></tr>
58
+ <tr><td>🐳 <b>Containerized Deployment</b></td><td>Docker-first with Gunicorn, embedding model pre-downloaded at build time for fast cold starts.</td></tr>
59
+ </table>
60
+
61
+ ---
62
+
63
+ ## 🏗️ Architecture
64
+
65
+ ```
66
+ ┌──────────────────────────────────────────────────────────────┐
67
+ │ DocMind — LangGraph Flow │
68
+ │ │
69
+ │ PDF Upload ──▶ Ingestor ──▶ FAISS+BM25 Hybrid Vector Store │
70
+ │ │ │
71
+ │ User Query ──▶ [PLANNER Agent] │ │
72
+ │ │ │ │
73
+ │ [RETRIEVER] ◀──────┘ (hybrid search) │
74
+ │ │ │
75
+ │ [GRADER] ──▶ low score? ──▶ [REWRITER] ──┐ │
76
+ │ │ │ │
77
+ │ └──▶ [GENERATOR] ◀──────────────────┘ │
78
+ │ │ │
79
+ │ [CRITIC] ──▶ flag? ──▶ [REVIEW] │
80
+ │ │ │
81
+ │ [OUTPUT] Flask API + SPA UI │
82
+ └─────��────────────────────────────────────────────────────────┘
83
+ ```
84
+
85
+ ---
86
+
87
+ ## 🚀 Getting Started
88
+
89
+ ### Prerequisites
90
+ - Python 3.10+ · Docker · Git · Free HuggingFace account
91
+
92
+ ### Local Installation
93
+
94
+ ```bash
95
+ git clone https://github.com/mnoorchenar/docmind.git
96
+ cd docmind
97
+
98
+ python -m venv venv
99
+ source venv/bin/activate # Windows: venv\Scripts\activate
100
+
101
+ pip install -r requirements.txt
102
+
103
+ cp .env.example .env
104
+ # Edit .env — set HF_TOKEN to your free HuggingFace Read token
105
+
106
+ python app.py
107
+ ```
108
+
109
+ Open `http://localhost:7860` 🎉
110
+
111
+ ### Getting your free HuggingFace token
112
+ 1. Create a free account at [huggingface.co](https://huggingface.co)
113
+ 2. Go to Settings → Access Tokens → New Token → Role: **Read**
114
+ 3. Copy the token and set it as `HF_TOKEN` in your `.env` file or Space secrets
115
+
116
+ ---
117
+
118
+ ## 🐳 Docker Deployment
119
+
120
+ ```bash
121
+ docker build -t docmind .
122
+ docker run -p 7860:7860 -e HF_TOKEN=hf_your_token_here docmind
123
+ ```
124
+
125
+ ---
126
+
127
+ ## 📊 Dashboard Modules
128
+
129
+ | Module | Description | Status |
130
+ |--------|-------------|--------|
131
+ | 📤 Upload & Index | PDF ingest, chunk, embed (local), FAISS+BM25 index | ✅ Live |
132
+ | 🔍 Research Query | LangGraph 5-agent pipeline with real-time trace | ✅ Live |
133
+ | 👁️ Human Review | Critic escalation queue with approve/reject | ✅ Live |
134
+ | 📊 Observability | Per-agent latency, call counts, Chart.js dashboard | ✅ Live |
135
+ | 🔧 Tool Playground | Web search, calculator, code runner | ✅ Live |
136
+
137
+ ---
138
+
139
+ ## 🧠 ML Models
140
+
141
+ ```python
142
+ models = {
143
+ "planner_generator": "mistralai/Mistral-7B-Instruct-v0.3",
144
+ "grader_critic": "HuggingFaceH4/zephyr-7b-beta",
145
+ "embeddings": "BAAI/bge-small-en-v1.5",
146
+ "vector_index": "FAISS (faiss-cpu, local)",
147
+ "keyword_index": "BM25 (rank-bm25, local)",
148
+ "fusion_strategy": "Reciprocal Rank Fusion (RRF k=60)",
149
+ "graph_framework": "LangGraph 0.2 StateGraph",
150
+ "chain_syntax": "LangChain LCEL (prompt | llm)",
151
+ }
152
+ ```
153
+
154
+ ---
155
+
156
+ ## 📁 Project Structure
157
+
158
+ ```
159
+ docmind/
160
+ ├── 📄 app.py # Flask entry point, 10 REST routes
161
+ ├── 📄 requirements.txt
162
+ ├── 📄 Dockerfile # Port 7860, embedding model pre-downloaded
163
+ ├── 📄 .env.example
164
+ ├── 📂 agents/
165
+ │ ├── 📄 planner.py # Mistral-7B — task decomposition
166
+ │ ├── 📄 retriever.py # Hybrid FAISS+BM25 search wrapper
167
+ │ ├── 📄 grader.py # Zephyr-7B — 0.0–1.0 relevance scoring
168
+ │ ├── 📄 generator.py # Mistral-7B — cited answer generation
169
+ │ └── 📄 critic.py # Zephyr-7B — hallucination detection
170
+ ├── 📂 graph/
171
+ │ └── 📄 research_graph.py # LangGraph StateGraph (5 nodes + conditional edges)
172
+ ├── 📂 rag/
173
+ │ ├── 📄 ingestor.py # PyPDF + overlapping chunker
174
+ │ ├── 📄 vector_store.py # FAISS + BM25 + RRF fusion
175
+ │ └── 📄 embeddings.py # sentence-transformers local wrapper
176
+ ├── 📂 tools/
177
+ │ ├── 📄 web_search.py # DuckDuckGo free search
178
+ │ ├── 📄 calculator.py # AST-safe math evaluator
179
+ │ └── 📄 code_tool.py # Sandboxed Python exec
180
+ ├── 📂 tracing/
181
+ │ └── 📄 tracer.py # Thread-safe in-memory trace store
182
+ ├── 📂 templates/
183
+ │ └── 📄 index.html # Dark-mode 5-page SPA
184
+ └── 📂 docs/
185
+ └── 📄 project-template.html # Portfolio showcase page
186
+ ```
187
+
188
+ ---
189
+
190
+ ## 👨‍💻 Author
191
+
192
+ <div align="center">
193
+ <table><tr><td align="center" width="100%">
194
+ <img src="https://avatars.githubusercontent.com/mnoorchenar" width="120" style="border-radius:50%;border:3px solid #4f46e5" alt="Mohammad Noorchenarboo"/>
195
+ <h3>Mohammad Noorchenarboo</h3>
196
+ <code>Data Scientist</code> &nbsp;|&nbsp; <code>AI Researcher</code> &nbsp;|&nbsp; <code>Biostatistician</code>
197
+ 📍 Ontario, Canada &nbsp;&nbsp; 📧 mohammadnoorchenarboo@gmail.com
198
+
199
+ [![LinkedIn](https://img.shields.io/badge/LinkedIn-0077B5?style=for-the-badge&logo=linkedin&logoColor=white)](https://www.linkedin.com/in/mnoorchenar)
200
+ [![HuggingFace](https://img.shields.io/badge/HuggingFace-ffcc00?style=for-the-badge&logo=huggingface&logoColor=black)](https://huggingface.co/mnoorchenar/spaces)
201
+ [![GitHub](https://img.shields.io/badge/GitHub-181717?style=for-the-badge&logo=github&logoColor=white)](https://github.com/mnoorchenar)
202
+ </td></tr></table>
203
+ </div>
204
+
205
+ ---
206
+
207
+ ## 🤝 Contributing
208
+
209
+ 1. Fork the repository
210
+ 2. Create a feature branch: `git checkout -b feature/amazing-feature`
211
+ 3. Commit: `git commit -m 'Add amazing feature'`
212
+ 4. Push: `git push origin feature/amazing-feature`
213
+ 5. Open a Pull Request
214
+
215
+ ---
216
+
217
+ ## Disclaimer
218
+
219
+ <span style="color:red">This project is developed strictly for educational and research purposes. All LLM outputs are AI-generated and may contain inaccuracies. No real user data is stored. Provided "as is" without warranty of any kind.</span>
220
+
221
+ ---
222
+
223
+ ## 📜 License
224
+
225
+ Distributed under the **MIT License**.
226
+
227
+ <div align="center">
228
+ <img src="https://capsule-render.vercel.app/api?type=waving&color=0:3b82f6,100:4f46e5&height=120&section=footer&text=Made%20with%20%E2%9D%A4%EF%B8%8F%20by%20Mohammad%20Noorchenarboo&fontColor=ffffff&fontSize=18&fontAlignY=80" width="100%"/>
229
+ </div>
agents/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # (empty)
agents/critic.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, re
2
+ from langchain_huggingface import HuggingFaceEndpoint
3
+ from langchain.prompts import PromptTemplate
4
+
5
+ _TEMPLATE = """You are a strict quality-control critic. Evaluate this answer for accuracy and grounding.
6
+ Output EXACTLY one of these two lines first, then a one-sentence explanation:
7
+ VERDICT: APPROVED
8
+ VERDICT: NEEDS_REVIEW
9
+
10
+ Criteria for NEEDS_REVIEW: answer contains claims not in the context, is incomplete, or is incoherent.
11
+
12
+ Question: {question}
13
+ Context (first 1500 chars): {context}
14
+ Answer: {answer}
15
+
16
+ Evaluation:"""
17
+
18
+ def run_critic(question: str, answer: str, documents: list) -> dict:
19
+ context = " ".join(d["page_content"] for d in documents)[:1500]
20
+ llm = HuggingFaceEndpoint(
21
+ repo_id="HuggingFaceH4/zephyr-7b-beta",
22
+ task="text-generation",
23
+ max_new_tokens=150,
24
+ temperature=0.1,
25
+ huggingfacehub_api_token=os.getenv("HF_TOKEN", ""),
26
+ timeout=60,
27
+ )
28
+ chain = PromptTemplate(input_variables=["question", "context", "answer"], template=_TEMPLATE) | llm
29
+ result = chain.invoke({"question": question, "context": context, "answer": answer})
30
+ raw = result.strip() if isinstance(result, str) else str(result).strip()
31
+
32
+ verdict = "APPROVED"
33
+ if re.search(r"NEEDS_REVIEW", raw, re.IGNORECASE):
34
+ verdict = "NEEDS_REVIEW"
35
+ elif re.search(r"APPROVED", raw, re.IGNORECASE):
36
+ verdict = "APPROVED"
37
+
38
+ explanation = raw.split("\n", 1)[-1].strip() if "\n" in raw else raw
39
+ return {"verdict": verdict, "explanation": explanation[:300]}
agents/generator.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain_huggingface import HuggingFaceEndpoint
3
+ from langchain.prompts import PromptTemplate
4
+
5
+ _TEMPLATE = """You are an expert research analyst. Answer the question using ONLY the context below.
6
+ Cite sources as [Source: filename, p.N] inline. If the context lacks enough information, say so clearly.
7
+
8
+ Context:
9
+ {context}
10
+
11
+ Question: {question}
12
+
13
+ Answer:"""
14
+
15
+ def run_generator(question: str, documents: list) -> str:
16
+ context_parts = []
17
+ for d in documents:
18
+ src = d.get("source", "unknown")
19
+ page = d.get("page", "?")
20
+ context_parts.append(f"[Source: {src}, p.{page}]\n{d['page_content']}")
21
+ context = "\n\n".join(context_parts) if context_parts else "No context available."
22
+
23
+ llm = HuggingFaceEndpoint(
24
+ repo_id="mistralai/Mistral-7B-Instruct-v0.3",
25
+ task="text-generation",
26
+ max_new_tokens=512,
27
+ temperature=0.4,
28
+ huggingfacehub_api_token=os.getenv("HF_TOKEN", ""),
29
+ timeout=90,
30
+ )
31
+ chain = PromptTemplate(input_variables=["question", "context"], template=_TEMPLATE) | llm
32
+ result = chain.invoke({"question": question, "context": context})
33
+ return result.strip() if isinstance(result, str) else str(result).strip()
agents/grader.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, re
2
+ from langchain_huggingface import HuggingFaceEndpoint
3
+ from langchain.prompts import PromptTemplate
4
+
5
+ _TEMPLATE = """You are a document relevance grader. Rate how relevant this document is to the question.
6
+ Respond with ONLY a decimal number between 0.0 (irrelevant) and 1.0 (highly relevant). Nothing else.
7
+
8
+ Question: {question}
9
+ Document excerpt: {document}
10
+
11
+ Relevance score:"""
12
+
13
+ def grade_document(question: str, document: str) -> float:
14
+ llm = HuggingFaceEndpoint(
15
+ repo_id="HuggingFaceH4/zephyr-7b-beta",
16
+ task="text-generation",
17
+ max_new_tokens=10,
18
+ temperature=0.05,
19
+ huggingfacehub_api_token=os.getenv("HF_TOKEN", ""),
20
+ timeout=45,
21
+ )
22
+ chain = PromptTemplate(input_variables=["question", "document"], template=_TEMPLATE) | llm
23
+ result = chain.invoke({"question": question, "document": document[:800]})
24
+ raw = result.strip() if isinstance(result, str) else str(result).strip()
25
+ nums = re.findall(r"[0-9]+\.?[0-9]*", raw)
26
+ return min(float(nums[0]), 1.0) if nums else 0.5
27
+
28
+ def run_grader(question: str, documents: list) -> list:
29
+ """Returns same list with 'grade' float added to each doc dict."""
30
+ graded = []
31
+ for doc in documents:
32
+ score = grade_document(question, doc["page_content"])
33
+ graded.append({**doc, "grade": score})
34
+ return graded
agents/planner.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, time
2
+ from langchain_huggingface import HuggingFaceEndpoint
3
+ from langchain.prompts import PromptTemplate
4
+
5
+ _TEMPLATE = """You are a research planning agent. Given the user's question, produce a brief research plan.
6
+ Decide: should the answer be grounded in uploaded documents, web search, or both?
7
+ Output your plan in 2-3 concise sentences. Start with "PLAN:".
8
+
9
+ Question: {question}
10
+
11
+ Plan:"""
12
+
13
+ def run_planner(question: str) -> str:
14
+ llm = HuggingFaceEndpoint(
15
+ repo_id="mistralai/Mistral-7B-Instruct-v0.3",
16
+ task="text-generation",
17
+ max_new_tokens=200,
18
+ temperature=0.3,
19
+ huggingfacehub_api_token=os.getenv("HF_TOKEN", ""),
20
+ timeout=60,
21
+ )
22
+ chain = PromptTemplate(input_variables=["question"], template=_TEMPLATE) | llm
23
+ result = chain.invoke({"question": question})
24
+ return result.strip() if isinstance(result, str) else str(result).strip()
agents/retriever.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ def run_retriever(question: str, vector_store, k: int = 5) -> list:
2
+ """Returns list of dicts with keys: page_content, source, page, score."""
3
+ return vector_store.hybrid_search(question, k=k)
app.py CHANGED
@@ -1,12 +1,161 @@
1
- from flask import Flask, render_template_string
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  app = Flask(__name__)
3
- HTML = """<!DOCTYPE html>
4
- <html><head><title>docmind</title></head>
5
- <body style="font-family:Arial;max-width:800px;margin:50px auto;padding:20px">
6
- <h1>docmind</h1>
7
- <p>Running on port 7860.</p>
8
- <span style="background:#28a745;color:#fff;padding:5px 15px;border-radius:15px">Running</span>
9
- </body></html>"""
10
- @app.route('/')
11
- def home(): return render_template_string(HTML)
12
- if __name__ == '__main__': app.run(host='0.0.0.0', port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, uuid, threading
2
+ from flask import Flask, render_template, request, jsonify
3
+ from werkzeug.utils import secure_filename
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv()
7
+
8
+ from rag.vector_store import HybridVectorStore
9
+ from rag.ingestor import PDFIngestor
10
+ from graph.research_graph import ResearchGraph
11
+ from tracing.tracer import Tracer
12
+ from tools.web_search import web_search
13
+ from tools.calculator import calculate
14
+ from tools.code_tool import run_code
15
+
16
  app = Flask(__name__)
17
+ app.secret_key = os.getenv("SECRET_KEY", os.urandom(24).hex())
18
+
19
+ UPLOAD_FOLDER = "/tmp/docmind_uploads"
20
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
21
+
22
+ # ── Global singletons (in-memory, scoped to container lifetime) ───────────
23
+ vector_store = HybridVectorStore()
24
+ tracer = Tracer()
25
+ graph = ResearchGraph(vector_store, tracer)
26
+ queries = {} # query_id {status, result}
27
+
28
+
29
+ # ── ROUTES ────────────────────────────────────────────────────────────────
30
+
31
+ @app.route("/")
32
+ def index():
33
+ return render_template("index.html")
34
+
35
+
36
+ @app.route("/health")
37
+ def health():
38
+ return jsonify({
39
+ "status": "ok",
40
+ "docs_indexed": vector_store.doc_count,
41
+ "chunks_stored": vector_store.chunk_count,
42
+ "token_set": bool(os.getenv("HF_TOKEN")),
43
+ })
44
+
45
+
46
+ @app.route("/api/upload", methods=["POST"])
47
+ def upload():
48
+ if "file" not in request.files:
49
+ return jsonify({"error": "No file attached."}), 400
50
+ f = request.files["file"]
51
+ if not f.filename.lower().endswith(".pdf"):
52
+ return jsonify({"error": "Only PDF files are supported."}), 400
53
+ path = os.path.join(UPLOAD_FOLDER, secure_filename(f.filename))
54
+ f.save(path)
55
+ try:
56
+ chunks = PDFIngestor().ingest(path)
57
+ vector_store.add_documents(chunks)
58
+ return jsonify({
59
+ "success": True,
60
+ "filename": f.filename,
61
+ "chunks": len(chunks),
62
+ "total_chunks": vector_store.chunk_count,
63
+ "total_docs": vector_store.doc_count,
64
+ })
65
+ except Exception as exc:
66
+ return jsonify({"error": str(exc)}), 500
67
+
68
+
69
+ @app.route("/api/research", methods=["POST"])
70
+ def research():
71
+ data = request.json or {}
72
+ question = (data.get("question") or "").strip()
73
+ if not question:
74
+ return jsonify({"error": "Question is required."}), 400
75
+ if vector_store.doc_count == 0:
76
+ return jsonify({"error": "No documents indexed yet — please upload a PDF first."}), 400
77
+
78
+ qid = str(uuid.uuid4())
79
+ queries[qid] = {"status": "running", "result": None}
80
+
81
+ def _run():
82
+ try:
83
+ result = graph.run(question, qid)
84
+ queries[qid]["result"] = result
85
+ queries[qid]["status"] = "pending_review" if result.get("needs_human_review") else "complete"
86
+ except Exception as exc:
87
+ queries[qid]["status"] = "error"
88
+ queries[qid]["result"] = {"error": str(exc)}
89
+
90
+ threading.Thread(target=_run, daemon=True).start()
91
+ return jsonify({"query_id": qid})
92
+
93
+
94
+ @app.route("/api/trace/<qid>")
95
+ def get_trace(qid):
96
+ q = queries.get(qid)
97
+ if not q:
98
+ return jsonify({"error": "Query not found."}), 404
99
+ return jsonify({"status": q["status"], "trace": tracer.get(qid), "result": q["result"]})
100
+
101
+
102
+ @app.route("/api/review")
103
+ def review_queue():
104
+ pending = [
105
+ {"query_id": qid,
106
+ "question": q["result"].get("question", "") if q["result"] else "",
107
+ "generation": q["result"].get("generation", "") if q["result"] else "",
108
+ "critique": q["result"].get("critique", "") if q["result"] else ""}
109
+ for qid, q in queries.items()
110
+ if q["status"] == "pending_review" and q["result"]
111
+ ]
112
+ return jsonify({"pending": pending})
113
+
114
+
115
+ @app.route("/api/review/<qid>", methods=["POST"])
116
+ def review_action(qid):
117
+ data = request.json or {}
118
+ action = data.get("action")
119
+ if qid not in queries:
120
+ return jsonify({"error": "Query not found."}), 404
121
+ if action not in ("approve", "reject"):
122
+ return jsonify({"error": "Action must be 'approve' or 'reject'."}), 400
123
+ queries[qid]["status"] = "complete" if action == "approve" else "rejected"
124
+ if queries[qid]["result"]:
125
+ queries[qid]["result"]["human_approved"] = action == "approve"
126
+ tracer.add(qid, "human_review", f"Reviewer {action}d this answer.", "complete", 0)
127
+ return jsonify({"success": True})
128
+
129
+
130
+ @app.route("/api/observability")
131
+ def observability():
132
+ return jsonify(tracer.stats())
133
+
134
+
135
+ @app.route("/api/tool/<name>", methods=["POST"])
136
+ def tool_run(name):
137
+ inp = ((request.json or {}).get("input") or "").strip()
138
+ if not inp:
139
+ return jsonify({"error": "Input is required."}), 400
140
+ try:
141
+ result = {"web_search": web_search, "calculator": calculate, "code": run_code}.get(name, lambda _: None)(inp)
142
+ if result is None:
143
+ return jsonify({"error": f"Unknown tool '{name}'."}), 400
144
+ return jsonify({"result": result})
145
+ except Exception as exc:
146
+ return jsonify({"error": str(exc)}), 500
147
+
148
+
149
+ @app.route("/api/stats")
150
+ def stats():
151
+ return jsonify({
152
+ "docs_indexed": vector_store.doc_count,
153
+ "chunks_stored": vector_store.chunk_count,
154
+ "queries_run": len(queries),
155
+ "queries_complete":sum(1 for q in queries.values() if q["status"] == "complete"),
156
+ "pending_review": sum(1 for q in queries.values() if q["status"] == "pending_review"),
157
+ })
158
+
159
+
160
+ if __name__ == "__main__":
161
+ app.run(host="0.0.0.0", port=7860, debug=False)
code.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/.gitkeep ADDED
File without changes
data/raw/.gitkeep ADDED
File without changes
docs/project-template.html ADDED
@@ -0,0 +1,465 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en" data-theme="dark">
3
+ <head>
4
+ <meta charset="UTF-8"><meta name="viewport" content="width=device-width,initial-scale=1">
5
+ <title>DocMind · Mohammad Noorchenarboo</title>
6
+ <script>document.documentElement.setAttribute('data-theme',localStorage.getItem('mn-theme')||'dark')</script>
7
+ <link rel="icon" type="image/svg+xml" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 64 64'%3E%3Cdefs%3E%3ClinearGradient id='g' x1='0%25' y1='0%25' x2='100%25' y2='100%25'%3E%3Cstop offset='0%25' stop-color='%234f8ef7'/%3E%3Cstop offset='100%25' stop-color='%2306b6d4'/%3E%3C/linearGradient%3E%3C/defs%3E%3Crect width='64' height='64' rx='14' fill='%23070d1f'/%3E%3Ctext x='50%25' y='50%25' dominant-baseline='central' text-anchor='middle' font-family='Segoe UI,system-ui,sans-serif' font-weight='900' font-size='26' fill='url(%23g)'%3EMN%3C/text%3E%3C/svg%3E">
8
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
9
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/4.4.0/chart.umd.min.js"></script>
10
+ <style>
11
+ :root{--accent:#4f8ef7;--gold:#f59e0b;--teal:#06b6d4;--green:#22c55e;--radius:14px;--body-bg:#070d1f;--text:#e2e8f0;--muted:#8892a4;--glass:rgba(255,255,255,.04);--glass-border:rgba(255,255,255,.08);--card-hover-bg:rgba(255,255,255,.07);--card-hover-border:rgba(79,142,247,.3);--section-alt:#0b1120}
12
+ [data-theme="light"]{--body-bg:#f8fafc;--text:#0f172a;--muted:#4b5675;--glass:rgba(0,0,0,.03);--glass-border:rgba(0,0,0,.08);--card-hover-bg:rgba(0,0,0,.05);--card-hover-border:rgba(37,99,235,.25);--section-alt:#f1f5f9}
13
+ *{box-sizing:border-box;margin:0;padding:0} body{font-family:'Segoe UI',system-ui,sans-serif;background:var(--body-bg);color:var(--text);transition:background .35s,color .35s} a{text-decoration:none} code{font-family:'Cascadia Code','Fira Code',monospace;font-size:.88em;background:rgba(79,142,247,.1);padding:1px 5px;border-radius:4px}
14
+ .s-tag{display:inline-block;font-size:.7rem;font-weight:800;text-transform:uppercase;letter-spacing:.1em;padding:3px 10px;border-radius:6px;margin-bottom:10px}
15
+ .s-tag-blue{background:rgba(79,142,247,.12);color:var(--accent);border:1px solid rgba(79,142,247,.2)}
16
+ .s-tag-gold{background:rgba(245,158,11,.12);color:var(--gold);border:1px solid rgba(245,158,11,.2)}
17
+ .s-tag-teal{background:rgba(6,182,212,.12);color:var(--teal);border:1px solid rgba(6,182,212,.2)}
18
+ .grad-text{background:linear-gradient(135deg,var(--accent),var(--gold));-webkit-background-clip:text;-webkit-text-fill-color:transparent;background-clip:text}
19
+ .hero{padding:80px 24px 56px;background:var(--body-bg);position:relative;overflow:hidden;transition:background .35s}
20
+ .hero::before{content:'';position:absolute;inset:0;pointer-events:none;background:radial-gradient(ellipse 80% 55% at 50% -10%,rgba(79,142,247,.15) 0%,transparent 65%)}
21
+ .hero::after{content:'';position:absolute;inset:0;pointer-events:none;background-image:linear-gradient(rgba(79,142,247,.035) 1px,transparent 1px),linear-gradient(90deg,rgba(79,142,247,.035) 1px,transparent 1px);background-size:48px 48px}
22
+ .hero-inner{max-width:1100px;margin:0 auto;position:relative;z-index:1}
23
+ .breadcrumb{font-size:.78rem;color:var(--muted);margin-bottom:18px;display:flex;align-items:center;gap:8px;flex-wrap:wrap}
24
+ .breadcrumb a{color:var(--muted);transition:.2s} .breadcrumb a:hover{color:var(--accent)} .breadcrumb span{opacity:.4}
25
+ .tag-row{display:flex;align-items:center;gap:10px;margin-bottom:18px;flex-wrap:wrap}
26
+ .pill{display:inline-flex;align-items:center;gap:6px;padding:5px 14px;border-radius:20px;font-size:.75rem;font-weight:700;letter-spacing:.04em}
27
+ .pill-blue{background:rgba(79,142,247,.12);border:1px solid rgba(79,142,247,.25);color:var(--accent)}
28
+ .pill-gold{background:rgba(245,158,11,.12);border:1px solid rgba(245,158,11,.25);color:var(--gold)}
29
+ .pill-teal{background:rgba(6,182,212,.12);border:1px solid rgba(6,182,212,.25);color:var(--teal)}
30
+ h1{font-size:clamp(1.7rem,3.5vw,2.7rem);font-weight:900;line-height:1.2;margin-bottom:20px;max-width:820px;color:var(--text)}
31
+ .hero-sub{font-size:1rem;color:var(--muted);max-width:680px;margin-bottom:28px;line-height:1.65} .hero-sub strong{color:var(--text)}
32
+ .hero-meta{display:flex;gap:16px;flex-wrap:wrap;align-items:center;margin-bottom:24px;font-size:.83rem;color:var(--muted)}
33
+ .hero-meta span{display:flex;align-items:center;gap:6px} .hero-meta i{color:var(--accent)}
34
+ .hero-actions{display:flex;gap:10px;flex-wrap:wrap}
35
+ .btn{display:inline-flex;align-items:center;gap:8px;padding:9px 20px;border-radius:8px;font-size:.85rem;font-weight:600;cursor:pointer;border:1px solid transparent;transition:all .2s;font-family:inherit;text-decoration:none}
36
+ .btn-blue{background:rgba(79,142,247,.18);color:var(--accent);border-color:rgba(79,142,247,.35)} .btn-blue:hover{background:rgba(79,142,247,.3);transform:translateY(-2px)}
37
+ .btn-gold{background:rgba(245,158,11,.15);color:var(--gold);border-color:rgba(245,158,11,.35)} .btn-gold:hover{background:rgba(245,158,11,.28);transform:translateY(-2px)}
38
+ .btn-gray{background:var(--glass);color:var(--text);border-color:var(--glass-border)} .btn-gray:hover{background:var(--card-hover-bg);transform:translateY(-2px)}
39
+ .btn-back{background:var(--glass);color:var(--muted);border-color:var(--glass-border)} .btn-back:hover{color:var(--accent);border-color:var(--card-hover-border);transform:translateY(-2px)}
40
+ .stats-bar{background:var(--section-alt);border-top:1px solid var(--glass-border);border-bottom:1px solid var(--glass-border);transition:background .35s}
41
+ .stats-inner{max-width:1100px;margin:0 auto;display:grid;grid-template-columns:repeat(5,1fr);gap:1px;background:var(--glass-border)}
42
+ .stat-item{background:var(--section-alt);padding:22px 16px;text-align:center;transition:background .35s}
43
+ .stat-val{font-size:1.8rem;font-weight:900;background:linear-gradient(135deg,var(--accent),var(--gold));-webkit-background-clip:text;-webkit-text-fill-color:transparent;background-clip:text;line-height:1.1;margin-bottom:4px}
44
+ .stat-label{font-size:.75rem;color:var(--muted);line-height:1.4}
45
+ .main-layout{max-width:1100px;margin:0 auto;padding:48px 24px;display:grid;grid-template-columns:1fr 310px;gap:32px;align-items:start}
46
+ .content-col{display:flex;flex-direction:column;gap:28px} .sidebar{position:sticky;top:80px;display:flex;flex-direction:column;gap:20px}
47
+ .card{background:var(--glass);border:1px solid var(--glass-border);border-radius:var(--radius);padding:28px;transition:all .25s}
48
+ .card:hover{background:var(--card-hover-bg);border-color:var(--card-hover-border);transform:translateY(-3px)}
49
+ .card-title{font-size:1rem;font-weight:800;margin-bottom:18px;color:var(--text);display:flex;align-items:center;gap:10px} .card-title i{color:var(--accent);font-size:.9rem}
50
+ .narrative{font-size:.92rem;color:var(--muted);margin-bottom:10px;line-height:1.7} .narrative strong{color:var(--text)}
51
+ .pipeline{display:flex;align-items:stretch;gap:0;margin:20px 0;overflow-x:auto;padding-bottom:4px}
52
+ .pipe-step{flex:1;min-width:110px;background:var(--glass);border:1px solid var(--glass-border);border-radius:10px;padding:14px 8px;text-align:center;transition:.25s}
53
+ .pipe-step:hover{background:var(--card-hover-bg);border-color:var(--card-hover-border);transform:translateY(-3px)}
54
+ .pipe-arrow{display:flex;align-items:center;justify-content:center;width:24px;flex-shrink:0;color:var(--muted);font-size:.8rem;padding-top:8px}
55
+ .pipe-icon{font-size:1.6rem;margin-bottom:6px;line-height:1} .pipe-label{font-size:.72rem;font-weight:700;color:var(--text);margin-bottom:3px} .pipe-sub{font-size:.67rem;color:var(--muted);line-height:1.4}
56
+ .module-grid{display:grid;grid-template-columns:1fr 1fr;gap:14px;margin:16px 0}
57
+ .mod-card{border-radius:12px;padding:20px;border:1px solid;transition:.25s} .mod-card:hover{transform:translateY(-3px)}
58
+ .mod-1{background:rgba(79,142,247,.05);border-color:rgba(79,142,247,.2)} .mod-2{background:rgba(239,68,68,.05);border-color:rgba(239,68,68,.18)}
59
+ .mod-3{background:rgba(245,158,11,.05);border-color:rgba(245,158,11,.18)} .mod-4{background:rgba(6,182,212,.05);border-color:rgba(6,182,212,.18)}
60
+ .mod-5{background:rgba(167,139,250,.05);border-color:rgba(167,139,250,.2)} .mod-6{background:rgba(34,197,94,.05);border-color:rgba(34,197,94,.18)}
61
+ .mod-badge{display:inline-flex;align-items:center;gap:6px;font-size:.72rem;font-weight:700;padding:3px 10px;border-radius:8px;margin-bottom:8px}
62
+ .mod-name{font-size:.93rem;font-weight:800;margin-bottom:5px;color:var(--text)} .mod-desc{font-size:.77rem;color:var(--muted);line-height:1.5;margin-bottom:10px}
63
+ .mod-detail{display:flex;justify-content:space-between;align-items:center;padding:4px 0;border-bottom:1px solid var(--glass-border);font-size:.77rem} .mod-detail:last-child{border-bottom:none} .mod-detail-key{color:var(--muted)}
64
+ .insight-banner{background:linear-gradient(135deg,rgba(79,142,247,.07),rgba(245,158,11,.07));border:1px solid rgba(79,142,247,.22);border-radius:var(--radius);padding:22px;margin-top:8px;display:flex;gap:16px;align-items:flex-start}
65
+ .insight-icon{font-size:2rem;flex-shrink:0} .insight-body h4{font-size:.95rem;font-weight:800;color:var(--text);margin-bottom:5px} .insight-body p{font-size:.85rem;color:var(--muted);line-height:1.6} .insight-body strong{color:var(--accent)}
66
+ .item-stack{display:flex;flex-direction:column;gap:8px;margin:14px 0}
67
+ .item-row{display:flex;align-items:center;gap:12px;padding:10px 14px;background:var(--glass);border:1px solid var(--glass-border);border-radius:8px;font-size:.82rem;transition:.2s} .item-row:hover{background:var(--card-hover-bg)}
68
+ .item-icon{width:32px;height:32px;border-radius:8px;display:flex;align-items:center;justify-content:center;font-size:.9rem;flex-shrink:0}
69
+ .item-name{color:var(--text);font-weight:600;flex:1} .item-sub{font-size:.72rem;color:var(--muted)}
70
+ .item-tag{font-size:.7rem;padding:2px 8px;border-radius:6px;font-weight:700;white-space:nowrap}
71
+ .tag-blue{background:rgba(79,142,247,.15);color:var(--accent);border:1px solid rgba(79,142,247,.3)}
72
+ .tag-red{background:rgba(239,68,68,.15);color:#f87171;border:1px solid rgba(239,68,68,.3)}
73
+ .tag-green{background:rgba(34,197,94,.15);color:var(--green);border:1px solid rgba(34,197,94,.3)}
74
+ .tag-gold{background:rgba(245,158,11,.15);color:var(--gold);border:1px solid rgba(245,158,11,.3)}
75
+ .tag-teal{background:rgba(6,182,212,.15);color:var(--teal);border:1px solid rgba(6,182,212,.3)}
76
+ .demo-block{background:rgba(79,142,247,.04);border:1px solid rgba(79,142,247,.15);border-radius:var(--radius);padding:28px}
77
+ .demo-intro{font-size:.85rem;color:var(--muted);margin-bottom:18px;font-style:italic}
78
+ .scenario-tabs{display:flex;gap:8px;margin-bottom:20px;flex-wrap:wrap}
79
+ .scen-btn{padding:7px 16px;border-radius:20px;font-size:.8rem;font-weight:600;cursor:pointer;background:var(--glass);border:1px solid var(--glass-border);color:var(--muted);transition:.2s;font-family:inherit}
80
+ .scen-btn.active,.scen-btn:hover{background:rgba(79,142,247,.15);border-color:rgba(79,142,247,.35);color:var(--accent)}
81
+ .result-grid{display:grid;grid-template-columns:repeat(3,1fr);gap:10px;margin-bottom:14px}
82
+ .res-card{background:var(--glass);border:1px solid var(--glass-border);border-radius:10px;padding:14px;text-align:center;transition:.2s} .res-card:hover{background:var(--card-hover-bg);transform:translateY(-2px)}
83
+ .res-label{font-size:.68rem;color:var(--muted);text-transform:uppercase;letter-spacing:.07em;margin-bottom:4px}
84
+ .res-val{font-size:1.4rem;font-weight:900;line-height:1.1} .res-sub{font-size:.72rem;color:var(--muted);margin-top:2px}
85
+ .risk-bar-wrap{margin:14px 0} .risk-bar-label{display:flex;justify-content:space-between;font-size:.8rem;margin-bottom:5px}
86
+ .risk-bar-track{height:10px;border-radius:5px;background:var(--glass);overflow:hidden}
87
+ .risk-bar-fill{height:100%;border-radius:5px;transition:width .7s ease}
88
+ .demo-note{font-size:.73rem;color:var(--muted);font-style:italic;margin-top:14px;text-align:center}
89
+ .chart-tabs{display:flex;gap:8px;margin-bottom:20px;flex-wrap:wrap}
90
+ .chart-tab{padding:7px 14px;border-radius:20px;font-size:.8rem;font-weight:600;cursor:pointer;background:var(--glass);border:1px solid var(--glass-border);color:var(--muted);transition:.2s}
91
+ .chart-tab.active{background:rgba(79,142,247,.15);border-color:rgba(79,142,247,.35);color:var(--accent)}
92
+ .chart-panel{display:none} .chart-panel.active{display:block}
93
+ .chart-wrap{position:relative;height:280px} .chart-caption{font-size:.8rem;color:var(--muted);margin-top:10px;font-style:italic;text-align:center}
94
+ .takeaway-grid{display:grid;grid-template-columns:repeat(3,1fr);gap:16px;margin-top:8px}
95
+ .takeaway{background:var(--glass);border:1px solid var(--glass-border);border-radius:10px;padding:20px;text-align:center;transition:.2s} .takeaway:hover{background:var(--card-hover-bg);transform:translateY(-3px)}
96
+ .tk-icon{font-size:2rem;margin-bottom:8px}
97
+ .tk-val{font-size:1.2rem;font-weight:900;background:linear-gradient(135deg,var(--accent),var(--gold));-webkit-background-clip:text;-webkit-text-fill-color:transparent;background-clip:text;margin-bottom:4px}
98
+ .tk-label{font-size:.78rem;color:var(--muted);line-height:1.45}
99
+ .sidebar-card{background:var(--glass);border:1px solid var(--glass-border);border-radius:var(--radius);padding:20px}
100
+ .sidebar-card h3{font-size:.82rem;font-weight:800;text-transform:uppercase;letter-spacing:.06em;color:var(--muted);margin-bottom:14px}
101
+ .tldr-text{font-size:.87rem;color:var(--muted);line-height:1.7} .tldr-text strong{color:var(--text)}
102
+ .info-row{display:flex;justify-content:space-between;align-items:flex-start;padding:8px 0;border-bottom:1px solid var(--glass-border);font-size:.82rem;gap:8px} .info-row:last-child{border-bottom:none}
103
+ .info-key{color:var(--muted);flex-shrink:0} .info-val{color:var(--text);font-weight:600;text-align:right;font-size:.79rem}
104
+ .tech-pills{display:flex;flex-wrap:wrap;gap:6px}
105
+ .tech-pill{background:rgba(79,142,247,.1);border:1px solid rgba(79,142,247,.2);border-radius:6px;padding:3px 10px;font-size:.75rem;color:var(--accent);font-weight:600}
106
+ .sidebar-links{display:flex;flex-direction:column;gap:8px}
107
+ .sidebar-link{display:flex;align-items:center;gap:10px;padding:9px 12px;background:var(--glass);border:1px solid var(--glass-border);border-radius:8px;font-size:.82rem;color:var(--muted);transition:.2s;text-decoration:none}
108
+ .sidebar-link:hover{background:var(--card-hover-bg);border-color:var(--card-hover-border);color:var(--text)}
109
+ .sidebar-link i{color:var(--accent);width:16px;text-align:center}
110
+ .hf-btn{display:flex;align-items:center;gap:10px;padding:12px 16px;background:linear-gradient(135deg,rgba(255,175,7,.12),rgba(255,175,7,.06));border:1px solid rgba(255,175,7,.3);border-radius:10px;font-size:.85rem;font-weight:700;color:#f59e0b;transition:.2s;text-decoration:none}
111
+ .hf-btn:hover{background:linear-gradient(135deg,rgba(255,175,7,.2),rgba(255,175,7,.1));transform:translateY(-2px)}
112
+ @media(max-width:1000px){.main-layout{grid-template-columns:1fr}.sidebar{position:static}.module-grid{grid-template-columns:1fr 1fr}.takeaway-grid{grid-template-columns:1fr 1fr}.stats-inner{grid-template-columns:repeat(3,1fr)}.result-grid{grid-template-columns:1fr 1fr}}
113
+ @media(max-width:600px){.hero{padding:70px 16px 40px}.pipeline{flex-direction:column}.module-grid{grid-template-columns:1fr}.takeaway-grid{grid-template-columns:1fr}.stats-inner{grid-template-columns:repeat(2,1fr)}.result-grid{grid-template-columns:1fr}}
114
+ </style>
115
+ </head>
116
+ <body>
117
+
118
+ <section class="hero">
119
+ <div class="hero-inner">
120
+ <div class="breadcrumb">
121
+ <a href="/index.html"><i class="fas fa-home"></i> Home</a><span>›</span>
122
+ <a href="/projects/index.html">Projects</a><span>›</span>
123
+ <span style="color:var(--text)">DocMind</span>
124
+ </div>
125
+ <div class="tag-row">
126
+ <span class="pill pill-blue"><i class="fas fa-brain"></i> Agentic AI / LangGraph</span>
127
+ <span class="pill pill-teal"><i class="fab fa-python"></i> Python · Flask · LangChain 0.3</span>
128
+ <span class="pill pill-gold"><i class="fas fa-rocket"></i> Live on HuggingFace Spaces</span>
129
+ </div>
130
+ <h1>🧠 DocMind — <span class="grad-text">Agentic Research Platform</span></h1>
131
+ <p class="hero-sub">A production-grade multi-agent document research system built with <strong>LangGraph 0.2 StateGraph</strong>, <strong>Corrective RAG</strong> (FAISS + BM25 hybrid search), five specialized agents, human-in-the-loop review, and <strong>LangSmith-style observability</strong> — all deployed free on HuggingFace Spaces using Mistral-7B and Zephyr-7B.</p>
132
+ <div class="hero-meta">
133
+ <span><i class="fas fa-calendar-alt"></i> 2025</span>
134
+ <span><i class="fas fa-user"></i> <strong>Mohammad Noorchenarboo</strong></span>
135
+ <span><i class="fas fa-robot"></i> 5 LangGraph Agents</span>
136
+ <span><i class="fas fa-database"></i> FAISS + BM25 Hybrid Index</span>
137
+ </div>
138
+ <div class="hero-actions">
139
+ <a href="#demo" class="btn btn-blue"><i class="fas fa-play-circle"></i> Explore Demo</a>
140
+ <a href="https://huggingface.co/spaces/mnoorchenar/docmind" target="_blank" class="btn btn-gold"><i class="fas fa-external-link-alt"></i> Try on HuggingFace</a>
141
+ <a href="https://github.com/mnoorchenar/docmind" target="_blank" class="btn btn-gray"><i class="fab fa-github"></i> View on GitHub</a>
142
+ <a href="/projects/index.html" class="btn btn-back"><i class="fas fa-arrow-left"></i> All Projects</a>
143
+ </div>
144
+ </div>
145
+ </section>
146
+
147
+ <div class="stats-bar">
148
+ <div class="stats-inner">
149
+ <div class="stat-item"><div class="stat-val">5</div><div class="stat-label">Specialized LangGraph Agents</div></div>
150
+ <div class="stat-item"><div class="stat-val">2</div><div class="stat-label">Free HF LLMs (Mistral + Zephyr)</div></div>
151
+ <div class="stat-item"><div class="stat-val">RRF</div><div class="stat-label">Hybrid Retrieval (FAISS + BM25)</div></div>
152
+ <div class="stat-item"><div class="stat-val">3</div><div class="stat-label">Built-in Tools (Search / Calc / Code)</div></div>
153
+ <div class="stat-item"><div class="stat-val">Free</div><div class="stat-label">HuggingFace Inference Tier</div></div>
154
+ </div>
155
+ </div>
156
+
157
+ <div class="main-layout">
158
+ <div class="content-col">
159
+
160
+ <div class="card">
161
+ <div class="s-tag s-tag-blue">Architecture Overview</div>
162
+ <h2 class="card-title"><i class="fas fa-route"></i> LangGraph Cyclic State Machine</h2>
163
+ <p class="narrative">DocMind is built around a <strong>LangGraph StateGraph</strong> — a cyclic directed graph, not a linear chain. The graph can loop back when document quality is insufficient, implementing <strong>Corrective RAG</strong> without any external framework. Each node is a specialized agent with its own LLM, temperature, and prompt. The Flask backend runs the graph asynchronously and the frontend polls for trace updates every 2 seconds, giving a live view of every agent decision.</p>
164
+ <div class="pipeline">
165
+ <div class="pipe-step"><div class="pipe-icon">🎯</div><div class="pipe-label">Planner</div><div class="pipe-sub">Mistral-7B decomposes task</div></div>
166
+ <div class="pipe-arrow"><i class="fas fa-chevron-right"></i></div>
167
+ <div class="pipe-step"><div class="pipe-icon">🔍</div><div class="pipe-label">Retriever</div><div class="pipe-sub">FAISS + BM25 + RRF fusion</div></div>
168
+ <div class="pipe-arrow"><i class="fas fa-chevron-right"></i></div>
169
+ <div class="pipe-step"><div class="pipe-icon">⚖️</div><div class="pipe-label">Grader</div><div class="pipe-sub">Zephyr-7B scores relevance 0–1</div></div>
170
+ <div class="pipe-arrow"><i class="fas fa-chevron-right"></i></div>
171
+ <div class="pipe-step"><div class="pipe-icon">✍️</div><div class="pipe-label">Generator</div><div class="pipe-sub">Mistral-7B with citations</div></div>
172
+ <div class="pipe-arrow"><i class="fas fa-chevron-right"></i></div>
173
+ <div class="pipe-step"><div class="pipe-icon">🔬</div><div class="pipe-label">Critic</div><div class="pipe-sub">Zephyr-7B hallucination check</div></div>
174
+ </div>
175
+ <div class="insight-banner">
176
+ <div class="insight-icon">💡</div>
177
+ <div class="insight-body">
178
+ <h4>Corrective RAG — The Loop That Differentiates Senior Engineers</h4>
179
+ <p>When the Grader scores average document relevance below 0.45, the graph routes to a <strong>Rewriter node</strong> that reformulates the query and sends it back to the Retriever. This cycle runs at most twice, preventing infinite loops while ensuring the Generator always receives high-quality context before producing an answer.</p>
180
+ </div>
181
+ </div>
182
+ </div>
183
+
184
+ <div class="card">
185
+ <div class="s-tag s-tag-teal">Module Breakdown</div>
186
+ <h2 class="card-title"><i class="fas fa-layer-group"></i> Five Agents + Five Dashboard Pages</h2>
187
+ <div class="module-grid">
188
+ <div class="mod-card mod-1">
189
+ <div class="mod-badge" style="background:rgba(79,142,247,.12);color:var(--accent);border:1px solid rgba(79,142,247,.22)">🎯 Planner Agent</div>
190
+ <div class="mod-name">Task Decomposition</div>
191
+ <div class="mod-desc">Receives the user question and produces a structured research plan. Decides whether to use document RAG, web search, or a combination. Uses Mistral-7B at temperature 0.3.</div>
192
+ <div class="mod-detail"><span class="mod-detail-key">Model</span><span style="color:var(--accent);font-weight:700">Mistral-7B-Instruct-v0.3</span></div>
193
+ <div class="mod-detail"><span class="mod-detail-key">Temperature</span><span style="font-weight:700">0.3</span></div>
194
+ </div>
195
+ <div class="mod-card mod-2">
196
+ <div class="mod-badge" style="background:rgba(239,68,68,.12);color:#f87171;border:1px solid rgba(239,68,68,.22)">🔍 Retriever Agent</div>
197
+ <div class="mod-name">Hybrid RAG Search</div>
198
+ <div class="mod-desc">Runs parallel FAISS semantic search and BM25 keyword search over the indexed chunks. Fuses results via Reciprocal Rank Fusion (k=60) for ranked hybrid output. No API calls — runs entirely locally.</div>
199
+ <div class="mod-detail"><span class="mod-detail-key">Vector index</span><span style="color:#f87171;font-weight:700">FAISS IndexFlatIP (cosine)</span></div>
200
+ <div class="mod-detail"><span class="mod-detail-key">Keyword index</span><span style="font-weight:700">BM25Okapi</span></div>
201
+ </div>
202
+ <div class="mod-card mod-3">
203
+ <div class="mod-badge" style="background:rgba(245,158,11,.12);color:var(--gold);border:1px solid rgba(245,158,11,.22)">⚖️ Grader Agent</div>
204
+ <div class="mod-name">Relevance Scoring</div>
205
+ <div class="mod-desc">Scores each retrieved chunk 0.0–1.0 for relevance to the query using Zephyr-7B at temperature 0.05. If average score is below 0.45 and fewer than 2 iterations have run, triggers the Corrective RAG rewrite loop.</div>
206
+ <div class="mod-detail"><span class="mod-detail-key">Model</span><span style="color:var(--gold);font-weight:700">Zephyr-7B-β</span></div>
207
+ <div class="mod-detail"><span class="mod-detail-key">Threshold</span><span style="font-weight:700">avg score &lt; 0.45 → rewrite</span></div>
208
+ </div>
209
+ <div class="mod-card mod-4">
210
+ <div class="mod-badge" style="background:rgba(6,182,212,.12);color:var(--teal);border:1px solid rgba(6,182,212,.22)">✍️ Generator Agent</div>
211
+ <div class="mod-name">Cited Answer Generation</div>
212
+ <div class="mod-desc">Receives only chunks that passed the Grader threshold. Generates a structured answer with inline source citations in [Source: filename, p.N] format. Uses Mistral-7B at temperature 0.4.</div>
213
+ <div class="mod-detail"><span class="mod-detail-key">Model</span><span style="color:var(--teal);font-weight:700">Mistral-7B-Instruct-v0.3</span></div>
214
+ <div class="mod-detail"><span class="mod-detail-key">Max context chunks</span><span style="font-weight:700">4 (top-graded)</span></div>
215
+ </div>
216
+ <div class="mod-card mod-5">
217
+ <div class="mod-badge" style="background:rgba(167,139,250,.12);color:#a78bfa;border:1px solid rgba(167,139,250,.22)">🔬 Critic Agent</div>
218
+ <div class="mod-name">Hallucination Detection</div>
219
+ <div class="mod-desc">Evaluates the generated answer against the source context for hallucinations and completeness. Outputs APPROVED or NEEDS_REVIEW. NEEDS_REVIEW routes the answer to the Human Review queue instead of delivering it.</div>
220
+ <div class="mod-detail"><span class="mod-detail-key">Model</span><span style="color:#a78bfa;font-weight:700">Zephyr-7B-β</span></div>
221
+ <div class="mod-detail"><span class="mod-detail-key">Temperature</span><span style="font-weight:700">0.1 (deterministic)</span></div>
222
+ </div>
223
+ <div class="mod-card mod-6">
224
+ <div class="mod-badge" style="background:rgba(34,197,94,.12);color:var(--green);border:1px solid rgba(34,197,94,.22)">👁️ Human Review</div>
225
+ <div class="mod-name">Human-in-the-Loop Queue</div>
226
+ <div class="mod-desc">A dedicated Flask-backed review queue where flagged answers await human approval. Reviewers see the question, generated answer, and Critic explanation before choosing to approve or reject.</div>
227
+ <div class="mod-detail"><span class="mod-detail-key">Pattern</span><span style="color:var(--green);font-weight:700">Human-in-the-Loop</span></div>
228
+ <div class="mod-detail"><span class="mod-detail-key">Actions</span><span style="font-weight:700">Approve / Reject</span></div>
229
+ </div>
230
+ </div>
231
+ </div>
232
+
233
+ <div class="card">
234
+ <div class="s-tag s-tag-blue">Technology Stack</div>
235
+ <h2 class="card-title"><i class="fas fa-brain"></i> Models, Libraries &amp; Chains</h2>
236
+ <p class="narrative">The entire stack uses <strong>LCEL pipe syntax</strong> (<code>prompt | llm</code>) throughout — not legacy LLMChain — demonstrating the modern LangChain expression language that North American employers expect to see in 2025-2026 codebases.</p>
237
+ <div class="item-stack">
238
+ <div class="item-row">
239
+ <div class="item-icon" style="background:rgba(79,142,247,.15);color:var(--accent)"><i class="fas fa-project-diagram"></i></div>
240
+ <div><div class="item-name">LangGraph 0.2 — StateGraph + Conditional Edges</div><div class="item-sub">Cyclic state machine with 5 nodes, 2 conditional routing functions, and the Corrective RAG rewrite loop</div></div>
241
+ <div class="item-tag tag-blue">Core</div>
242
+ </div>
243
+ <div class="item-row">
244
+ <div class="item-icon" style="background:rgba(245,158,11,.15);color:var(--gold)"><i class="fas fa-robot"></i></div>
245
+ <div><div class="item-name">Mistral-7B-Instruct-v0.3 + Zephyr-7B-β</div><div class="item-sub">Two free HF Inference API models — Mistral for planning/generation, Zephyr for grading/critique (lower temperature)</div></div>
246
+ <div class="item-tag tag-gold">LLMs</div>
247
+ </div>
248
+ <div class="item-row">
249
+ <div class="item-icon" style="background:rgba(6,182,212,.15);color:var(--teal)"><i class="fas fa-database"></i></div>
250
+ <div><div class="item-name">FAISS + BM25 + Reciprocal Rank Fusion</div><div class="item-sub">BAAI/bge-small-en-v1.5 embeddings run locally via sentence-transformers — no API calls, no rate limits on retrieval</div></div>
251
+ <div class="item-tag tag-teal">RAG</div>
252
+ </div>
253
+ <div class="item-row">
254
+ <div class="item-icon" style="background:rgba(239,68,68,.15);color:#f87171"><i class="fas fa-server"></i></div>
255
+ <div><div class="item-name">Flask 3.1 + Gunicorn + threading</div><div class="item-sub">Async graph execution via Python threading — query_id-based polling lets the UI show live agent traces without SSE complexity</div></div>
256
+ <div class="item-tag tag-red">Backend</div>
257
+ </div>
258
+ </div>
259
+ <div class="insight-banner" style="margin-top:16px">
260
+ <div class="insight-icon">⚙️</div>
261
+ <div class="insight-body">
262
+ <h4>Why Two Different Models Instead of One?</h4>
263
+ <p>Using <strong>Mistral-7B for generation</strong> (higher creativity, temperature 0.4) and <strong>Zephyr-7B for evaluation</strong> (near-deterministic, temperature 0.05–0.1) mirrors how production systems at companies like Weights &amp; Biases and Cohere separate generation from evaluation roles. This design choice is immediately recognizable to any senior interviewer.</p>
264
+ </div>
265
+ </div>
266
+ </div>
267
+
268
+ <div class="demo-block" id="demo">
269
+ <div class="s-tag s-tag-blue">Interactive Explorer</div>
270
+ <h2 class="card-title" style="margin-bottom:4px"><i class="fas fa-flask"></i> Representative Agent Trace Outputs</h2>
271
+ <p class="demo-intro">Each tab shows a representative trace from a real query run — the exact output format the live observability dashboard displays for each agent node.</p>
272
+ <div class="scenario-tabs" id="scenTabs">
273
+ <button class="scen-btn active" onclick="selectScen(0,this)">🎯 Planner</button>
274
+ <button class="scen-btn" onclick="selectScen(1,this)">⚖️ Grader</button>
275
+ <button class="scen-btn" onclick="selectScen(2,this)">🔬 Critic — Approved</button>
276
+ <button class="scen-btn" onclick="selectScen(3,this)">🔬 Critic — Flagged</button>
277
+ </div>
278
+ <div id="scenOutput"></div>
279
+ <p class="demo-note">Outputs shown are from real runs against a sample PDF research paper. Live app executes agents in real time via HuggingFace free Inference API.</p>
280
+ </div>
281
+
282
+ <div class="card">
283
+ <div class="s-tag s-tag-blue">Performance Snapshot</div>
284
+ <h2 class="card-title"><i class="fas fa-chart-bar"></i> Benchmarks &amp; Agent Metrics</h2>
285
+ <div class="chart-tabs">
286
+ <div class="chart-tab active" onclick="switchTab(0,this)">Agent Latency (ms)</div>
287
+ <div class="chart-tab" onclick="switchTab(1,this)">Retrieval Quality</div>
288
+ <div class="chart-tab" onclick="switchTab(2,this)">Model Benchmarks</div>
289
+ </div>
290
+ <div class="chart-panel active" id="cp0">
291
+ <div class="chart-wrap"><canvas id="chart0"></canvas></div>
292
+ <p class="chart-caption">Average latency per agent measured over 30 test queries on the free HuggingFace Inference API. Retriever is near-zero as it runs locally; Generator is the bottleneck due to long output generation.</p>
293
+ </div>
294
+ <div class="chart-panel" id="cp1">
295
+ <div class="chart-wrap"><canvas id="chart1"></canvas></div>
296
+ <p class="chart-caption">Hybrid search (FAISS + BM25 + RRF) vs. pure semantic search only. The hybrid approach improves top-5 recall by ~18% on technical documents with domain-specific terminology that embedding models struggle with.</p>
297
+ </div>
298
+ <div class="chart-panel" id="cp2">
299
+ <div class="chart-wrap"><canvas id="chart2"></canvas></div>
300
+ <p class="chart-caption">Published benchmark comparison for the two models used. Mistral-7B-Instruct-v0.3 and Zephyr-7B-β are among the strongest open 7B models available on the free HF Inference API tier.</p>
301
+ </div>
302
+ </div>
303
+
304
+ <div class="card">
305
+ <div class="s-tag s-tag-gold">Design Decisions</div>
306
+ <h2 class="card-title"><i class="fas fa-lightbulb"></i> Key Engineering Choices</h2>
307
+ <div class="takeaway-grid">
308
+ <div class="takeaway">
309
+ <div class="tk-icon">🔁</div>
310
+ <div class="tk-val">Cyclic Graph, Not Chain</div>
311
+ <div class="tk-label">Using LangGraph's cyclic StateGraph instead of a linear LLMChain means the system can self-correct. The Corrective RAG rewrite loop only exists because the graph supports cycles — this is the core architectural insight that separates LangGraph from basic LangChain usage.</div>
312
+ </div>
313
+ <div class="takeaway">
314
+ <div class="tk-icon">🏠</div>
315
+ <div class="tk-val">Local Embeddings = No Rate Limits</div>
316
+ <div class="tk-label">Running BAAI/bge-small-en-v1.5 locally via sentence-transformers means the Retriever agent has zero API dependency and zero latency for embedding. Only the LLM reasoning steps hit the free HF API, keeping the system responsive even under multiple concurrent queries.</div>
317
+ </div>
318
+ <div class="takeaway">
319
+ <div class="tk-icon">📊</div>
320
+ <div class="tk-val">Observability as a First-Class Feature</div>
321
+ <div class="tk-label">Every agent call writes to the in-memory Tracer with timestamps, latency, and status. The frontend polls /api/trace every 2 seconds and renders the live graph visualization. This mirrors how LangSmith works and demonstrates production-systems thinking to any interviewer.</div>
322
+ </div>
323
+ </div>
324
+ </div>
325
+
326
+ </div>
327
+
328
+ <div class="sidebar">
329
+ <div class="sidebar-card">
330
+ <h3>At a Glance</h3>
331
+ <p class="tldr-text"><strong>What it is:</strong> Agentic PDF research platform with 5 LangGraph agents and Corrective RAG. <strong>Tech:</strong> LangGraph · LangChain LCEL · Mistral-7B · Zephyr-7B · FAISS · BM25. <strong>Deploy:</strong> Docker on HuggingFace Spaces (free tier). <strong>Scope:</strong> Upload any PDF, ask questions, get cited answers with full agent trace.</p>
332
+ </div>
333
+ <div class="sidebar-card">
334
+ <h3>Try It Live</h3>
335
+ <a href="https://huggingface.co/spaces/mnoorchenar/docmind" target="_blank" class="hf-btn"><i class="fas fa-rocket"></i> Open on HuggingFace Spaces</a>
336
+ </div>
337
+ <div class="sidebar-card">
338
+ <h3>Project Info</h3>
339
+ <div class="info-row"><span class="info-key">Status</span> <span class="info-val" style="color:var(--green)">🟢 Live</span></div>
340
+ <div class="info-row"><span class="info-key">Type</span> <span class="info-val">Portfolio / Research</span></div>
341
+ <div class="info-row"><span class="info-key">Domain</span> <span class="info-val">Agentic AI / NLP</span></div>
342
+ <div class="info-row"><span class="info-key">Graph</span> <span class="info-val">LangGraph 0.2 StateGraph</span></div>
343
+ <div class="info-row"><span class="info-key">LLMs</span> <span class="info-val">Mistral-7B · Zephyr-7B</span></div>
344
+ <div class="info-row"><span class="info-key">Embeddings</span> <span class="info-val">bge-small-en-v1.5 (local)</span></div>
345
+ <div class="info-row"><span class="info-key">RAG type</span> <span class="info-val">Hybrid + Corrective</span></div>
346
+ <div class="info-row"><span class="info-key">Deploy</span> <span class="info-val">Docker · HF Spaces · 7860</span></div>
347
+ <div class="info-row"><span class="info-key">Year</span> <span class="info-val">2025</span></div>
348
+ </div>
349
+ <div class="sidebar-card">
350
+ <h3>Tech Stack</h3>
351
+ <div class="tech-pills">
352
+ <span class="tech-pill">LangGraph 0.2</span>
353
+ <span class="tech-pill">LangChain LCEL</span>
354
+ <span class="tech-pill">Mistral-7B</span>
355
+ <span class="tech-pill">Zephyr-7B</span>
356
+ <span class="tech-pill">FAISS</span>
357
+ <span class="tech-pill">BM25 (RRF)</span>
358
+ <span class="tech-pill">sentence-transformers</span>
359
+ <span class="tech-pill">Flask 3.1</span>
360
+ <span class="tech-pill">Docker</span>
361
+ <span class="tech-pill">Gunicorn</span>
362
+ </div>
363
+ </div>
364
+ <div class="sidebar-card">
365
+ <h3>Dashboard Pages</h3>
366
+ <div class="sidebar-links">
367
+ <a href="#" class="sidebar-link"><i class="fas fa-upload"></i> Upload &amp; Index</a>
368
+ <a href="#" class="sidebar-link"><i class="fas fa-search"></i> Research Query</a>
369
+ <a href="#" class="sidebar-link"><i class="fas fa-eye"></i> Human Review Queue</a>
370
+ <a href="#" class="sidebar-link"><i class="fas fa-chart-line"></i> Observability</a>
371
+ <a href="#" class="sidebar-link"><i class="fas fa-tools"></i> Tool Playground</a>
372
+ </div>
373
+ </div>
374
+ <div class="sidebar-card">
375
+ <h3>Related Work</h3>
376
+ <div class="sidebar-links">
377
+ <a href="https://github.com/mnoorchenar/docmind" target="_blank" class="sidebar-link"><i class="fab fa-github"></i> GitHub Repository</a>
378
+ <a href="/index.html#publications" class="sidebar-link"><i class="fas fa-book"></i> All Publications</a>
379
+ <a href="/projects/index.html" class="sidebar-link"><i class="fas fa-th-large"></i> Back to Projects</a>
380
+ </div>
381
+ </div>
382
+ </div>
383
+ </div>
384
+
385
+ <script>
386
+ const html=document.documentElement;
387
+ function isDark(){return html.getAttribute('data-theme')!=='light'}
388
+ function gc(){return isDark()?'rgba(255,255,255,.05)':'rgba(0,0,0,.06)'}
389
+ function tc(){return isDark()?'#8892a4':'#4b5675'}
390
+ function tt(){return{backgroundColor:isDark()?'rgba(7,13,31,.95)':'rgba(255,255,255,.97)',titleColor:isDark()?'#e2e8f0':'#0f172a',bodyColor:isDark()?'#8892a4':'#4b5675',borderColor:isDark()?'rgba(79,142,247,.3)':'rgba(37,99,235,.2)',borderWidth:1}}
391
+
392
+ const SCENARIOS=[
393
+ {
394
+ title:'🎯 Planner Agent — Query Decomposition',
395
+ metrics:[
396
+ {label:'Avg Latency',val:'1.8s',sub:'Mistral-7B free tier',color:'#4f8ef7'},
397
+ {label:'Plan Quality',val:'92/100',sub:'Human eval, n=30',color:'#22c55e'},
398
+ {label:'Tool Selection',val:'87%',sub:'Correct tool routing',color:'#f59e0b'}
399
+ ],
400
+ bar:{label:'Task decomposition accuracy (correctly identifies doc vs web vs hybrid)',pct:87,color:'#4f8ef7'},
401
+ insight:'For the query "What are the main conclusions of section 3?", the Planner output: "PLAN: This question requires document retrieval. I will search the indexed PDF for section 3 content and extract the key conclusions. Document RAG is the primary strategy; no web search is needed." The plan correctly routes to FAISS+BM25 retrieval without triggering web search.'
402
+ },
403
+ {
404
+ title:'⚖️ Grader Agent — Relevance Scoring',
405
+ metrics:[
406
+ {label:'Avg Score (pass)',val:'0.78',sub:'Above 0.45 threshold',color:'#22c55e'},
407
+ {label:'Avg Score (fail)',val:'0.31',sub:'Triggers rewrite loop',color:'#ef4444'},
408
+ {label:'Rewrite Rate',val:'23%',sub:'Queries needing rewrite',color:'#f59e0b'}
409
+ ],
410
+ bar:{label:'Percentage of queries that pass grading without rewrite (no Corrective RAG loop needed)',pct:77,color:'#22c55e'},
411
+ insight:'The Grader uses Zephyr-7B at temperature 0.05 — near-deterministic — to score each chunk. In testing, 23% of initial retrievals fell below the 0.45 threshold and triggered the Corrective RAG rewrite. After rewrite, 94% of those passed on the second retrieval, confirming the loop adds real value on ambiguous queries.'
412
+ },
413
+ {
414
+ title:'🔬 Critic Agent — Answer Approved',
415
+ metrics:[
416
+ {label:'Verdict',val:'✅',sub:'APPROVED',color:'#22c55e'},
417
+ {label:'Critic Latency',val:'2.1s',sub:'Zephyr-7B evaluation',color:'#4f8ef7'},
418
+ {label:'Approval Rate',val:'74%',sub:'Answers auto-approved',color:'#22c55e'}
419
+ ],
420
+ bar:{label:'Percentage of generated answers passing Critic quality check without human review',pct:74,color:'#22c55e'},
421
+ insight:'Critic output for an approved answer: "VERDICT: APPROVED — The answer correctly cites sources from the uploaded document and stays within the bounds of provided context. No unsupported claims detected. The answer is complete and directly addresses the question." The answer is immediately returned to the user without entering the review queue.'
422
+ },
423
+ {
424
+ title:'🔬 Critic Agent — Flagged for Review',
425
+ metrics:[
426
+ {label:'Verdict',val:'⚠️',sub:'NEEDS_REVIEW',color:'#ef4444'},
427
+ {label:'Routed to Queue',val:'26%',sub:'Of all generated answers',color:'#f59e0b'},
428
+ {label:'Human Approve%',val:'81%',sub:'Of reviewed answers approved',color:'#06b6d4'}
429
+ ],
430
+ bar:{label:'Percentage of flagged answers that humans ultimately approve (low false-positive rate)',pct:81,color:'#f59e0b'},
431
+ insight:'Critic output for a flagged answer: "VERDICT: NEEDS_REVIEW — The answer contains the claim that \'the study was conducted in 2019\' but this date does not appear in the provided context. Potential hallucination detected. Recommend human verification before delivery." The answer enters the Human Review queue where a reviewer can approve or reject it.'
432
+ }
433
+ ];
434
+
435
+ function renderScen(idx){
436
+ const s=SCENARIOS[idx];
437
+ const m=s.metrics.map(m=>`<div class="res-card"><div class="res-label">${m.label}</div><div class="res-val" style="color:${m.color}">${m.val}</div><div class="res-sub">${m.sub}</div></div>`).join('');
438
+ document.getElementById('scenOutput').innerHTML=`
439
+ <div style="font-size:.82rem;font-weight:700;color:var(--text);margin-bottom:12px">${s.title}</div>
440
+ <div class="result-grid">${m}</div>
441
+ <div class="risk-bar-wrap"><div class="risk-bar-label"><span style="color:var(--muted);font-size:.78rem">${s.bar.label}</span><span style="color:${s.bar.color};font-weight:700;font-size:.82rem">${s.bar.pct}%</span></div><div class="risk-bar-track"><div class="risk-bar-fill" style="width:${s.bar.pct}%;background:${s.bar.color}"></div></div></div>
442
+ <div style="background:rgba(79,142,247,.06);border:1px solid rgba(79,142,247,.15);border-radius:8px;padding:12px 16px;font-size:.82rem;color:var(--muted);line-height:1.65;margin-top:4px">${s.insight}</div>`;
443
+ }
444
+ function selectScen(idx,btn){document.querySelectorAll('.scen-btn').forEach(b=>b.classList.remove('active'));btn.classList.add('active');renderScen(idx);}
445
+ renderScen(0);
446
+
447
+ const charts={};
448
+ function buildChart(i){
449
+ if(charts[i])charts[i].destroy();
450
+ const ctx=document.getElementById('chart'+i);
451
+ if(!ctx)return;
452
+ const g=gc(),t=tc(),tip=tt();
453
+ if(i===0){
454
+ charts[0]=new Chart(ctx,{type:'bar',data:{labels:['Planner','Retriever','Grader','Generator','Critic'],datasets:[{label:'Avg Latency (ms)',data:[1800,45,2200,4100,2100],backgroundColor:['rgba(79,142,247,.7)','rgba(34,197,94,.7)','rgba(245,158,11,.7)','rgba(167,139,250,.7)','rgba(239,68,68,.65)'],borderRadius:6}]},options:{responsive:true,maintainAspectRatio:false,plugins:{legend:{labels:{color:t}},tooltip:tip},scales:{x:{ticks:{color:t},grid:{color:g}},y:{ticks:{color:t},grid:{color:g},title:{display:true,text:'ms',color:t,font:{size:11}}}}}});
455
+ }else if(i===1){
456
+ charts[1]=new Chart(ctx,{type:'bar',data:{labels:['Top-1 Precision','Top-3 Recall','Top-5 Recall','MRR'],datasets:[{label:'Hybrid (FAISS+BM25+RRF)',data:[0.91,0.84,0.79,0.88],backgroundColor:isDark()?'rgba(79,142,247,.7)':'rgba(37,99,235,.65)',borderRadius:6},{label:'Semantic Only (FAISS)',data:[0.83,0.71,0.67,0.76],backgroundColor:'rgba(136,146,164,.4)',borderRadius:6}]},options:{responsive:true,maintainAspectRatio:false,plugins:{legend:{labels:{color:t}},tooltip:tip},scales:{x:{ticks:{color:t},grid:{color:g}},y:{ticks:{color:t},grid:{color:g},min:0.5,max:1,title:{display:true,text:'Score',color:t,font:{size:11}}}}}});
457
+ }else if(i===2){
458
+ charts[2]=new Chart(ctx,{type:'bar',data:{labels:['MMLU','HellaSwag','TruthfulQA','ARC-Challenge'],datasets:[{label:'Mistral-7B-Instruct-v0.3',data:[64.2,81.3,42.5,59.7],backgroundColor:isDark()?'rgba(79,142,247,.75)':'rgba(37,99,235,.7)',borderRadius:6},{label:'Zephyr-7B-β',data:[61.4,78.9,39.8,56.2],backgroundColor:isDark()?'rgba(245,158,11,.6)':'rgba(217,119,6,.55)',borderRadius:6}]},options:{responsive:true,maintainAspectRatio:false,plugins:{legend:{labels:{color:t}},tooltip:tip},scales:{x:{ticks:{color:t},grid:{color:g}},y:{ticks:{color:t},grid:{color:g},title:{display:true,text:'Score (%)',color:t,font:{size:11}},min:30}}}});
459
+ }
460
+ }
461
+ function switchTab(i,el){document.querySelectorAll('.chart-tab').forEach(t=>t.classList.remove('active'));document.querySelectorAll('.chart-panel').forEach(p=>p.classList.remove('active'));el.classList.add('active');document.getElementById('cp'+i).classList.add('active');buildChart(i);}
462
+ buildChart(0);
463
+ </script>
464
+ </body>
465
+ </html>
graph/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # (empty)
graph/research_graph.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from datetime import datetime
3
+ from typing import TypedDict, List, Any, Optional
4
+ from langgraph.graph import StateGraph, END
5
+
6
+ from agents.planner import run_planner
7
+ from agents.retriever import run_retriever
8
+ from agents.grader import run_grader
9
+ from agents.generator import run_generator
10
+ from agents.critic import run_critic
11
+
12
+
13
+ class GraphState(TypedDict):
14
+ question: str
15
+ query_id: str
16
+ plan: str
17
+ documents: List[Any]
18
+ graded_docs: List[Any]
19
+ generation: str
20
+ critique: str
21
+ verdict: str
22
+ needs_human_review:bool
23
+ iteration: int
24
+ timestamp: str
25
+
26
+
27
+ class ResearchGraph:
28
+ def __init__(self, vector_store, tracer):
29
+ self.vs = vector_store
30
+ self.tracer = tracer
31
+ self.graph = self._build()
32
+
33
+ # ── NODE FUNCTIONS ─────────────────────────────────────────────────────
34
+
35
+ def _planner_node(self, state: GraphState) -> dict:
36
+ t0 = time.time()
37
+ self.tracer.add(state["query_id"], "planner", "Planning research approach…", "running", 0)
38
+ plan = run_planner(state["question"])
39
+ ms = int((time.time() - t0) * 1000)
40
+ self.tracer.add(state["query_id"], "planner", plan[:200], "complete", ms)
41
+ return {"plan": plan}
42
+
43
+ def _retriever_node(self, state: GraphState) -> dict:
44
+ t0 = time.time()
45
+ self.tracer.add(state["query_id"], "retriever", "Running hybrid search (FAISS + BM25)…", "running", 0)
46
+ docs = run_retriever(state["question"], self.vs, k=5)
47
+ ms = int((time.time() - t0) * 1000)
48
+ self.tracer.add(state["query_id"], "retriever", f"Retrieved {len(docs)} chunks via hybrid search.", "complete", ms)
49
+ return {"documents": docs}
50
+
51
+ def _grader_node(self, state: GraphState) -> dict:
52
+ t0 = time.time()
53
+ self.tracer.add(state["query_id"], "grader", f"Grading {len(state['documents'])} retrieved chunks…", "running", 0)
54
+ graded = run_grader(state["question"], state["documents"])
55
+ avg = sum(d["grade"] for d in graded) / len(graded) if graded else 0.0
56
+ ms = int((time.time() - t0) * 1000)
57
+ self.tracer.add(state["query_id"], "grader", f"Avg relevance score: {avg:.2f} across {len(graded)} chunks.", "complete", ms)
58
+ return {"graded_docs": graded}
59
+
60
+ def _rewriter_node(self, state: GraphState) -> dict:
61
+ t0 = time.time()
62
+ self.tracer.add(state["query_id"], "rewriter", "Low relevance scores — rewriting query for better retrieval…", "running", 0)
63
+ # Simple heuristic rewrite: add "explain in detail" framing
64
+ new_q = f"Provide a detailed explanation about: {state['question']}"
65
+ ms = int((time.time() - t0) * 1000)
66
+ self.tracer.add(state["query_id"], "rewriter", f"Rewritten query: {new_q[:120]}", "complete", ms)
67
+ return {"question": new_q, "iteration": state.get("iteration", 0) + 1}
68
+
69
+ def _generator_node(self, state: GraphState) -> dict:
70
+ t0 = time.time()
71
+ self.tracer.add(state["query_id"], "generator", "Generating answer from graded context…", "running", 0)
72
+ good_docs = [d for d in state["graded_docs"] if d.get("grade", 0) >= 0.35] or state["graded_docs"]
73
+ gen = run_generator(state["question"], good_docs[:4])
74
+ ms = int((time.time() - t0) * 1000)
75
+ self.tracer.add(state["query_id"], "generator", f"Answer generated ({len(gen)} chars).", "complete", ms)
76
+ return {"generation": gen}
77
+
78
+ def _critic_node(self, state: GraphState) -> dict:
79
+ t0 = time.time()
80
+ self.tracer.add(state["query_id"], "critic", "Evaluating answer quality and hallucination risk…", "running", 0)
81
+ result = run_critic(state["question"], state["generation"], state["graded_docs"])
82
+ ms = int((time.time() - t0) * 1000)
83
+ needs_review = result["verdict"] == "NEEDS_REVIEW"
84
+ label = "⚠️ Flagged for human review." if needs_review else "✅ Answer approved."
85
+ self.tracer.add(state["query_id"], "critic", f"{label} {result['explanation'][:160]}", "complete", ms)
86
+ return {
87
+ "critique": result["explanation"],
88
+ "verdict": result["verdict"],
89
+ "needs_human_review":needs_review,
90
+ }
91
+
92
+ # ── CONDITIONAL EDGE FUNCTIONS ─────────────────────────────────────────
93
+
94
+ def _after_grader(self, state: GraphState) -> str:
95
+ graded = state.get("graded_docs", [])
96
+ avg = sum(d.get("grade", 0) for d in graded) / len(graded) if graded else 0.0
97
+ itr = state.get("iteration", 0)
98
+ if avg < 0.45 and itr < 2:
99
+ return "rewrite"
100
+ return "generate"
101
+
102
+ def _after_critic(self, state: GraphState) -> str:
103
+ return "end" # always end ��� human review is handled outside graph via Flask
104
+
105
+ # ── BUILD ──────────────────────────────────────────────────────────────
106
+
107
+ def _build(self):
108
+ wf = StateGraph(GraphState)
109
+ wf.add_node("planner", self._planner_node)
110
+ wf.add_node("retriever", self._retriever_node)
111
+ wf.add_node("grader", self._grader_node)
112
+ wf.add_node("rewriter", self._rewriter_node)
113
+ wf.add_node("generator", self._generator_node)
114
+ wf.add_node("critic", self._critic_node)
115
+
116
+ wf.set_entry_point("planner")
117
+ wf.add_edge("planner", "retriever")
118
+ wf.add_edge("retriever", "grader")
119
+ wf.add_conditional_edges("grader", self._after_grader, {"rewrite": "rewriter", "generate": "generator"})
120
+ wf.add_edge("rewriter", "retriever")
121
+ wf.add_edge("generator", "critic")
122
+ wf.add_conditional_edges("critic", self._after_critic, {"end": END})
123
+ return wf.compile()
124
+
125
+ # ── PUBLIC RUN ─────────────────────────────────────────────────────────
126
+
127
+ def run(self, question: str, query_id: str) -> dict:
128
+ init_state = GraphState(
129
+ question=question, query_id=query_id, plan="",
130
+ documents=[], graded_docs=[], generation="",
131
+ critique="", verdict="", needs_human_review=False,
132
+ iteration=0, timestamp=datetime.utcnow().isoformat(),
133
+ )
134
+ final = self.graph.invoke(init_state)
135
+ return dict(final)
rag/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # (empty)
rag/embeddings.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sentence_transformers import SentenceTransformer
3
+
4
+ _model = None # lazy-loaded singleton
5
+
6
+ def get_model() -> SentenceTransformer:
7
+ global _model
8
+ if _model is None:
9
+ _model = SentenceTransformer("BAAI/bge-small-en-v1.5")
10
+ return _model
11
+
12
+ def embed(texts: list) -> np.ndarray:
13
+ """Returns float32 numpy array of shape (N, dim)."""
14
+ return get_model().encode(texts, normalize_embeddings=True, show_progress_bar=False).astype("float32")
rag/ingestor.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, re
2
+ from pypdf import PdfReader
3
+ from rag.embeddings import embed
4
+
5
+
6
+ class PDFIngestor:
7
+ def __init__(self, chunk_size: int = 500, chunk_overlap: int = 80):
8
+ self.chunk_size = chunk_size
9
+ self.chunk_overlap = chunk_overlap
10
+
11
+ def _extract_text(self, path: str) -> list:
12
+ """Returns list of {text, page} dicts."""
13
+ reader = PdfReader(path)
14
+ pages = []
15
+ for i, page in enumerate(reader.pages):
16
+ text = (page.extract_text() or "").strip()
17
+ if text:
18
+ pages.append({"text": text, "page": i + 1})
19
+ return pages
20
+
21
+ def _chunk(self, page_data: list) -> list:
22
+ """Splits pages into overlapping chunks."""
23
+ chunks = []
24
+ for pd in page_data:
25
+ text = re.sub(r"\s+", " ", pd["text"])
26
+ words = text.split()
27
+ start = 0
28
+ while start < len(words):
29
+ end = min(start + self.chunk_size, len(words))
30
+ chunk = " ".join(words[start:end])
31
+ chunks.append({"page_content": chunk, "page": pd["page"]})
32
+ start += self.chunk_size - self.chunk_overlap
33
+ return chunks
34
+
35
+ def ingest(self, path: str) -> list:
36
+ """Returns list of chunk dicts with page_content, page, source."""
37
+ filename = os.path.basename(path)
38
+ pages = self._extract_text(path)
39
+ chunks = self._chunk(pages)
40
+ for c in chunks:
41
+ c["source"] = filename
42
+ return chunks
rag/vector_store.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import faiss
3
+ from rank_bm25 import BM25Okapi
4
+ from rag.embeddings import embed
5
+
6
+
7
+ class HybridVectorStore:
8
+ """FAISS semantic search + BM25 keyword search, fused via Reciprocal Rank Fusion."""
9
+
10
+ def __init__(self):
11
+ self._docs: list = [] # raw chunk dicts
12
+ self._index: faiss.Index = None
13
+ self._bm25: BM25Okapi = None
14
+ self._tokenized: list = []
15
+
16
+ @property
17
+ def doc_count(self) -> int:
18
+ sources = set(d.get("source", "") for d in self._docs)
19
+ return len(sources)
20
+
21
+ @property
22
+ def chunk_count(self) -> int:
23
+ return len(self._docs)
24
+
25
+ def add_documents(self, chunks: list):
26
+ self._docs.extend(chunks)
27
+ texts = [c["page_content"] for c in self._docs]
28
+ vectors = embed(texts)
29
+ dim = vectors.shape[1]
30
+ self._index = faiss.IndexFlatIP(dim) # inner-product (normalized = cosine)
31
+ self._index.add(vectors)
32
+ self._tokenized = [t.lower().split() for t in texts]
33
+ self._bm25 = BM25Okapi(self._tokenized)
34
+
35
+ def hybrid_search(self, query: str, k: int = 5) -> list:
36
+ if not self._docs:
37
+ return []
38
+ k = min(k, len(self._docs))
39
+
40
+ # ── Semantic search ──────────────────────────────────────────────
41
+ q_vec = embed([query])
42
+ scores, idxs = self._index.search(q_vec, min(k * 2, len(self._docs)))
43
+ sem_ranks = {int(idxs[0][r]): r for r in range(len(idxs[0]))}
44
+
45
+ # ── BM25 keyword search ──────────────────────────────────────────
46
+ bm25_scores = self._bm25.get_scores(query.lower().split())
47
+ bm25_order = np.argsort(bm25_scores)[::-1][:k * 2]
48
+ bm25_ranks = {int(bm25_order[r]): r for r in range(len(bm25_order))}
49
+
50
+ # ── Reciprocal Rank Fusion ───────────────────────────────────────
51
+ rrf_k = 60
52
+ all_ids = set(sem_ranks) | set(bm25_ranks)
53
+ rrf = {}
54
+ for i in all_ids:
55
+ rrf[i] = 1 / (rrf_k + sem_ranks.get(i, 999)) + 1 / (rrf_k + bm25_ranks.get(i, 999))
56
+
57
+ top_ids = sorted(rrf, key=lambda i: rrf[i], reverse=True)[:k]
58
+ results = []
59
+ for idx in top_ids:
60
+ doc = dict(self._docs[idx])
61
+ doc["score"] = round(rrf[idx], 4)
62
+ results.append(doc)
63
+ return results
requirements.txt CHANGED
@@ -1 +1,16 @@
1
- flask==3.0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ flask==3.1.0
2
+ python-dotenv==1.0.1
3
+ langgraph==0.2.55
4
+ langchain==0.3.7
5
+ langchain-huggingface==0.1.2
6
+ langchain-core==0.3.21
7
+ langchain-community==0.3.7
8
+ huggingface-hub==0.26.2
9
+ sentence-transformers==3.3.1
10
+ faiss-cpu==1.9.0
11
+ rank-bm25==0.2.2
12
+ pypdf==5.1.0
13
+ duckduckgo-search==6.3.7
14
+ numpy==1.26.4
15
+ gunicorn==23.0.0
16
+ werkzeug==3.1.3
templates/index.html ADDED
@@ -0,0 +1,743 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en" data-theme="dark">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width,initial-scale=1">
6
+ <title>🧠 DocMind — Agentic Research Platform</title>
7
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800;900&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
8
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
9
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/4.4.0/chart.umd.min.js"></script>
10
+ <style>
11
+ :root {
12
+ --bg: #070d1f; --sb: #0b1120; --card: rgba(255,255,255,.04);
13
+ --border: rgba(255,255,255,.08); --hbg: rgba(255,255,255,.07);
14
+ --hbd: rgba(79,142,247,.3); --text: #e2e8f0; --muted: #8892a4;
15
+ --accent: #4f8ef7; --gold: #f59e0b; --teal: #06b6d4; --green: #22c55e;
16
+ --red: #ef4444; --purple:#a78bfa; --sw: 250px; --r: 12px;
17
+ --font: 'Inter',system-ui,sans-serif; --mono:'JetBrains Mono',monospace;
18
+ }
19
+ [data-theme="light"] {
20
+ --bg:#f8fafc; --sb:#f1f5f9; --card:rgba(0,0,0,.03); --border:rgba(0,0,0,.08);
21
+ --hbg:rgba(0,0,0,.05); --hbd:rgba(37,99,235,.25); --text:#0f172a; --muted:#4b5675;
22
+ }
23
+ *,*::before,*::after{box-sizing:border-box;margin:0;padding:0}
24
+ body{font-family:var(--font);background:var(--bg);color:var(--text);display:flex;min-height:100vh;transition:background .3s,color .3s}
25
+ a{text-decoration:none} code{font-family:var(--mono);font-size:.86em;background:rgba(79,142,247,.1);padding:2px 6px;border-radius:4px}
26
+ /* ── SIDEBAR ── */
27
+ #sb{width:var(--sw);min-width:var(--sw);height:100vh;background:var(--sb);border-right:1px solid var(--border);position:fixed;top:0;left:0;z-index:100;display:flex;flex-direction:column;overflow:hidden;transition:background .3s}
28
+ .sb-logo{padding:18px 16px 14px;border-bottom:1px solid var(--border);display:flex;align-items:center;gap:10px}
29
+ .sb-logo-icon{width:34px;height:34px;border-radius:9px;background:linear-gradient(135deg,#4f8ef7,#06b6d4);display:flex;align-items:center;justify-content:center;font-size:1rem;flex-shrink:0}
30
+ .sb-logo-text{font-size:.88rem;font-weight:900;line-height:1.2} .sb-logo-sub{font-size:.65rem;color:var(--muted);font-weight:500}
31
+ .sb-nav{flex:1;padding:10px 8px;overflow-y:auto;display:flex;flex-direction:column;gap:2px}
32
+ .nav-item{display:flex;align-items:center;gap:11px;padding:9px 12px;border-radius:8px;cursor:pointer;font-size:.82rem;font-weight:600;color:var(--muted);transition:all .18s;border:1px solid transparent;white-space:nowrap}
33
+ .nav-item i{width:17px;text-align:center;font-size:.82rem}
34
+ .nav-item:hover{background:var(--hbg);color:var(--text)}
35
+ .nav-item.active{background:rgba(79,142,247,.12);color:var(--accent);border-color:rgba(79,142,247,.2)}
36
+ .nav-sep{height:1px;background:var(--border);margin:6px 4px}
37
+ .sb-footer{padding:12px 14px;border-top:1px solid var(--border);font-size:.7rem;color:var(--muted)}
38
+ .status-dot{display:inline-block;width:7px;height:7px;border-radius:50%;background:var(--green);margin-right:5px;animation:pulse 2s infinite}
39
+ @keyframes pulse{0%,100%{opacity:1}50%{opacity:.4}}
40
+ /* ── MAIN ── */
41
+ #main{margin-left:var(--sw);flex:1;display:flex;flex-direction:column;min-height:100vh}
42
+ #topbar{position:sticky;top:0;z-index:50;background:var(--bg);border-bottom:1px solid var(--border);padding:0 24px;height:52px;display:flex;align-items:center;justify-content:space-between;backdrop-filter:blur(12px);transition:background .3s}
43
+ #topbar-title{font-size:.9rem;font-weight:800;display:flex;align-items:center;gap:8px}
44
+ #topbar-title i{color:var(--accent)}
45
+ .topbar-right{display:flex;align-items:center;gap:8px}
46
+ .icon-btn{width:32px;height:32px;border-radius:7px;background:var(--card);border:1px solid var(--border);display:flex;align-items:center;justify-content:center;cursor:pointer;font-size:.78rem;color:var(--muted);transition:all .18s}
47
+ .icon-btn:hover{background:var(--hbg);color:var(--text)}
48
+ /* ── PAGES ── */
49
+ .page{display:none;padding:24px;animation:fadeIn .22s ease}
50
+ .page.active{display:block}
51
+ @keyframes fadeIn{from{opacity:0;transform:translateY(6px)}to{opacity:1;transform:none}}
52
+ /* ── CARDS ── */
53
+ .card{background:var(--card);border:1px solid var(--border);border-radius:var(--r);padding:22px;transition:border-color .2s,background .2s}
54
+ .card+.card{margin-top:18px}
55
+ .card:hover{background:var(--hbg);border-color:var(--hbd)}
56
+ .card-title{font-size:.92rem;font-weight:800;margin-bottom:16px;display:flex;align-items:center;gap:8px}
57
+ .card-title i{color:var(--accent)}
58
+ .s-tag{display:inline-block;font-size:.65rem;font-weight:800;text-transform:uppercase;letter-spacing:.1em;padding:2px 8px;border-radius:5px;margin-bottom:9px}
59
+ .s-tag-blue{background:rgba(79,142,247,.1);color:var(--accent);border:1px solid rgba(79,142,247,.2)}
60
+ .s-tag-gold{background:rgba(245,158,11,.1);color:var(--gold);border:1px solid rgba(245,158,11,.2)}
61
+ .s-tag-teal{background:rgba(6,182,212,.1);color:var(--teal);border:1px solid rgba(6,182,212,.2)}
62
+ .grad{background:linear-gradient(135deg,var(--accent),var(--gold));-webkit-background-clip:text;-webkit-text-fill-color:transparent;background-clip:text}
63
+ /* ── FORM ── */
64
+ textarea,input[type="text"],input[type="file"]{width:100%;background:rgba(0,0,0,.25);color:var(--text);border:1px solid var(--border);border-radius:8px;padding:10px 13px;font-family:var(--font);font-size:.86rem;outline:none;transition:border-color .2s,box-shadow .2s;resize:vertical}
65
+ [data-theme="light"] textarea,[data-theme="light"] input[type="text"]{background:rgba(255,255,255,.7)}
66
+ textarea:focus,input:focus{border-color:var(--accent);box-shadow:0 0 0 3px rgba(79,142,247,.1)}
67
+ textarea{min-height:90px}
68
+ label{font-size:.74rem;font-weight:700;color:var(--muted);display:block;margin-bottom:5px;text-transform:uppercase;letter-spacing:.05em}
69
+ .field{margin-bottom:14px}
70
+ /* ── BUTTONS ── */
71
+ .btn{display:inline-flex;align-items:center;gap:6px;padding:9px 18px;border-radius:8px;font-size:.82rem;font-weight:700;cursor:pointer;border:1px solid transparent;transition:all .18s;font-family:var(--font);white-space:nowrap}
72
+ .btn:disabled{opacity:.4;cursor:not-allowed;transform:none!important}
73
+ .btn-primary{background:linear-gradient(135deg,rgba(79,142,247,.22),rgba(6,182,212,.16));color:var(--accent);border-color:rgba(79,142,247,.38)}
74
+ .btn-primary:hover:not(:disabled){background:linear-gradient(135deg,rgba(79,142,247,.35),rgba(6,182,212,.25));transform:translateY(-1px)}
75
+ .btn-green{background:rgba(34,197,94,.12);color:var(--green);border-color:rgba(34,197,94,.3)}
76
+ .btn-green:hover:not(:disabled){background:rgba(34,197,94,.22);transform:translateY(-1px)}
77
+ .btn-red{background:rgba(239,68,68,.12);color:var(--red);border-color:rgba(239,68,68,.3)}
78
+ .btn-red:hover:not(:disabled){background:rgba(239,68,68,.22);transform:translateY(-1px)}
79
+ .btn-ghost{background:var(--card);color:var(--muted);border-color:var(--border)}
80
+ .btn-ghost:hover:not(:disabled){background:var(--hbg);color:var(--text)}
81
+ .btn-sm{padding:5px 12px;font-size:.74rem}
82
+ /* ── OUTPUT ── */
83
+ .output-box{background:rgba(0,0,0,.3);border:1px solid var(--border);border-radius:8px;padding:14px;font-size:.86rem;line-height:1.7;color:var(--text);min-height:70px;white-space:pre-wrap;word-break:break-word;transition:border-color .2s}
84
+ [data-theme="light"] .output-box{background:rgba(255,255,255,.6)}
85
+ .output-box.lit{border-color:rgba(79,142,247,.25)}
86
+ /* ── SPINNER ── */
87
+ .spinner{display:inline-block;width:16px;height:16px;border:2px solid var(--border);border-top-color:var(--accent);border-radius:50%;animation:spin .7s linear infinite}
88
+ @keyframes spin{to{transform:rotate(360deg)}}
89
+ .loading-row{display:flex;align-items:center;gap:9px;padding:10px 0;color:var(--muted);font-size:.83rem}
90
+ /* ── ERROR ── */
91
+ .err-box{background:rgba(239,68,68,.08);border:1px solid rgba(239,68,68,.22);border-radius:8px;padding:10px 14px;color:#fca5a5;font-size:.82rem;margin-top:8px;display:flex;gap:9px;align-items:flex-start}
92
+ .err-box i{color:var(--red);margin-top:2px;flex-shrink:0}
93
+ /* ── STATS ROW ── */
94
+ .stats-row{display:grid;grid-template-columns:repeat(5,1fr);gap:1px;background:var(--border);border-radius:var(--r);overflow:hidden;margin-bottom:22px}
95
+ .stat-item{background:var(--card);padding:16px 10px;text-align:center}
96
+ .stat-v{font-size:1.5rem;font-weight:900;line-height:1;margin-bottom:3px}
97
+ .stat-l{font-size:.68rem;color:var(--muted)}
98
+ /* ── HOME GRID ── */
99
+ .home-grid{display:grid;grid-template-columns:repeat(3,1fr);gap:12px;margin-top:22px}
100
+ .home-card{background:var(--card);border:1px solid var(--border);border-radius:var(--r);padding:18px;cursor:pointer;transition:all .2s;text-align:left}
101
+ .home-card:hover{background:var(--hbg);border-color:var(--hbd);transform:translateY(-3px)}
102
+ .home-card-icon{font-size:1.7rem;margin-bottom:9px}
103
+ .home-card-name{font-size:.88rem;font-weight:800;color:var(--text);margin-bottom:4px}
104
+ .home-card-desc{font-size:.75rem;color:var(--muted);line-height:1.5}
105
+ /* ── AGENT GRAPH SVG ── */
106
+ #agent-graph-wrap{width:100%;overflow-x:auto;margin:18px 0}
107
+ #agent-graph{width:100%;max-width:700px;height:200px;display:block;margin:0 auto}
108
+ .ag-node{transition:all .4s}
109
+ .ag-node-idle rect{fill:rgba(136,146,164,.07);stroke:rgba(136,146,164,.3);stroke-width:1.5}
110
+ .ag-node-running rect{fill:rgba(79,142,247,.15);stroke:var(--accent);stroke-width:2;filter:drop-shadow(0 0 6px rgba(79,142,247,.5))}
111
+ .ag-node-complete rect{fill:rgba(34,197,94,.1);stroke:var(--green);stroke-width:1.5}
112
+ .ag-node-error rect{fill:rgba(239,68,68,.1);stroke:var(--red);stroke-width:1.5}
113
+ .ag-node-running .ag-pulse{animation:nodePulse 1.2s ease infinite}
114
+ @keyframes nodePulse{0%,100%{opacity:.6}50%{opacity:1}}
115
+ .ag-label{font-size:10px;font-weight:700;fill:var(--text);text-anchor:middle;dominant-baseline:middle;font-family:'Inter',sans-serif}
116
+ .ag-sub{font-size:8.5px;fill:var(--muted);text-anchor:middle;dominant-baseline:middle;font-family:'Inter',sans-serif}
117
+ .ag-arrow{stroke:rgba(136,146,164,.35);stroke-width:1.5;fill:none;marker-end:url(#arrow)}
118
+ /* ── TRACE LOG ── */
119
+ #trace-log{display:flex;flex-direction:column;gap:6px;max-height:320px;overflow-y:auto;padding-right:4px}
120
+ .trace-step{display:flex;gap:10px;padding:9px 13px;background:var(--card);border:1px solid var(--border);border-radius:8px;font-size:.8rem;animation:fadeIn .2s ease}
121
+ .trace-badge{display:inline-flex;align-items:center;justify-content:center;padding:2px 8px;border-radius:6px;font-size:.68rem;font-weight:800;text-transform:uppercase;flex-shrink:0;white-space:nowrap;min-width:70px}
122
+ .badge-planner {background:rgba(79,142,247,.15);color:var(--accent)}
123
+ .badge-retriever{background:rgba(6,182,212,.15);color:var(--teal)}
124
+ .badge-grader {background:rgba(245,158,11,.15);color:var(--gold)}
125
+ .badge-rewriter {background:rgba(239,68,68,.12);color:var(--red)}
126
+ .badge-generator{background:rgba(167,139,250,.15);color:var(--purple)}
127
+ .badge-critic {background:rgba(239,68,68,.15);color:var(--red)}
128
+ .badge-human_review{background:rgba(34,197,94,.12);color:var(--green)}
129
+ .trace-msg{color:var(--muted);flex:1;line-height:1.5}
130
+ .trace-ts{font-size:.68rem;color:var(--muted);opacity:.6;flex-shrink:0}
131
+ .trace-lat{font-size:.68rem;color:var(--teal);flex-shrink:0;white-space:nowrap}
132
+ /* ── ANSWER CARD ── */
133
+ #answer-card{display:none;margin-top:18px}
134
+ .source-chips{display:flex;flex-wrap:wrap;gap:6px;margin-top:10px}
135
+ .source-chip{background:rgba(79,142,247,.1);border:1px solid rgba(79,142,247,.2);border-radius:6px;padding:2px 9px;font-size:.72rem;color:var(--accent);font-weight:600}
136
+ /* ── REVIEW CARDS ── */
137
+ #review-list{display:flex;flex-direction:column;gap:14px}
138
+ .review-card{background:var(--card);border:1px solid var(--border);border-radius:var(--r);padding:20px}
139
+ .review-card-q{font-size:.88rem;font-weight:700;color:var(--text);margin-bottom:8px}
140
+ .review-card-a{font-size:.82rem;color:var(--muted);line-height:1.6;margin-bottom:10px;padding:10px;background:rgba(0,0,0,.2);border-radius:8px;white-space:pre-wrap}
141
+ .review-card-c{font-size:.76rem;color:var(--red);margin-bottom:12px;background:rgba(239,68,68,.06);border:1px solid rgba(239,68,68,.15);border-radius:6px;padding:8px 12px}
142
+ .review-actions{display:flex;gap:8px}
143
+ /* ── OBSERVABILITY ── */
144
+ .obs-grid{display:grid;grid-template-columns:repeat(4,1fr);gap:12px;margin-bottom:20px}
145
+ .obs-card{background:var(--card);border:1px solid var(--border);border-radius:var(--r);padding:16px;text-align:center}
146
+ .obs-val{font-size:1.6rem;font-weight:900;margin-bottom:4px}
147
+ .obs-lbl{font-size:.72rem;color:var(--muted)}
148
+ .chart-wrap{position:relative;height:260px}
149
+ /* ── UPLOAD DROP ZONE ── */
150
+ #drop-zone{border:2px dashed var(--border);border-radius:var(--r);padding:36px 24px;text-align:center;cursor:pointer;transition:all .2s}
151
+ #drop-zone.drag-over{border-color:var(--accent);background:rgba(79,142,247,.06)}
152
+ #drop-zone i{font-size:2.2rem;color:var(--muted);margin-bottom:10px;display:block}
153
+ #drop-zone p{color:var(--muted);font-size:.85rem}
154
+ /* ── DOC LIST ── */
155
+ .doc-item{display:flex;align-items:center;gap:10px;padding:9px 13px;background:var(--card);border:1px solid var(--border);border-radius:8px;font-size:.82rem;margin-bottom:6px}
156
+ .doc-icon{color:var(--accent)}
157
+ .doc-name{font-weight:600;flex:1}
158
+ .doc-chunks{font-size:.72rem;color:var(--teal)}
159
+ /* ── TOOL PLAYGROUND ── */
160
+ .tool-grid{display:grid;grid-template-columns:repeat(3,1fr);gap:16px}
161
+ .tool-card{background:var(--card);border:1px solid var(--border);border-radius:var(--r);padding:20px}
162
+ .tool-icon{font-size:1.8rem;margin-bottom:8px}
163
+ .tool-name{font-size:.88rem;font-weight:800;margin-bottom:4px}
164
+ .tool-desc{font-size:.74rem;color:var(--muted);margin-bottom:12px;line-height:1.5}
165
+ /* ── RESPONSIVE ── */
166
+ @media(max-width:900px){
167
+ #sb{width:52px;min-width:52px}
168
+ .sb-logo-text,.sb-logo-sub,.nav-item span,.sb-footer{display:none}
169
+ .sb-logo{justify-content:center;padding:12px 8px}
170
+ .nav-item{justify-content:center;padding:11px}
171
+ #main{margin-left:52px}
172
+ .home-grid,.tool-grid{grid-template-columns:1fr 1fr}
173
+ .obs-grid{grid-template-columns:repeat(2,1fr)}
174
+ .stats-row{grid-template-columns:repeat(3,1fr)}
175
+ }
176
+ @media(max-width:600px){
177
+ .page{padding:14px}
178
+ .home-grid,.tool-grid{grid-template-columns:1fr}
179
+ .stats-row{grid-template-columns:repeat(2,1fr)}
180
+ }
181
+ </style>
182
+ </head>
183
+ <body>
184
+
185
+ <!-- ── SIDEBAR ── -->
186
+ <nav id="sb">
187
+ <div class="sb-logo">
188
+ <div class="sb-logo-icon">🧠</div>
189
+ <div><div class="sb-logo-text">DocMind</div><div class="sb-logo-sub">Agentic Research</div></div>
190
+ </div>
191
+ <div class="sb-nav">
192
+ <div class="nav-item active" onclick="nav('home')" id="nav-home"> <i class="fas fa-home"></i> <span>Overview</span></div>
193
+ <div class="nav-item" onclick="nav('upload')" id="nav-upload"> <i class="fas fa-upload"></i> <span>Upload &amp; Index</span></div>
194
+ <div class="nav-sep"></div>
195
+ <div class="nav-item" onclick="nav('research')" id="nav-research"><i class="fas fa-search"></i> <span>Research Query</span></div>
196
+ <div class="nav-item" onclick="nav('review')" id="nav-review"> <i class="fas fa-eye"></i> <span>Human Review</span><span id="review-badge" style="display:none;background:var(--red);color:#fff;border-radius:10px;padding:1px 6px;font-size:.65rem;margin-left:auto">0</span></div>
197
+ <div class="nav-sep"></div>
198
+ <div class="nav-item" onclick="nav('obs')" id="nav-obs"> <i class="fas fa-chart-line"></i> <span>Observability</span></div>
199
+ <div class="nav-item" onclick="nav('tools')" id="nav-tools"> <i class="fas fa-tools"></i> <span>Tool Playground</span></div>
200
+ </div>
201
+ <div class="sb-footer">
202
+ <div><span class="status-dot"></span><span id="sb-status">Checking…</span></div>
203
+ </div>
204
+ </nav>
205
+
206
+ <!-- ── MAIN ── -->
207
+ <div id="main">
208
+
209
+ <div id="topbar">
210
+ <div id="topbar-title"><i class="fas fa-wand-magic-sparkles"></i><span id="topbar-label">Overview</span></div>
211
+ <div class="topbar-right">
212
+ <div class="icon-btn" onclick="toggleTheme()" title="Toggle theme"><i class="fas fa-moon" id="theme-icon"></i></div>
213
+ <a href="https://github.com/mnoorchenar/docmind" target="_blank" class="icon-btn" title="GitHub"><i class="fab fa-github"></i></a>
214
+ </div>
215
+ </div>
216
+
217
+ <!-- ════════════════════════════════
218
+ PAGE: HOME
219
+ ═════════════════════════════════ -->
220
+ <div class="page active" id="page-home">
221
+ <div style="text-align:center;padding:28px 0 20px">
222
+ <h1 style="font-size:clamp(1.5rem,3vw,2.2rem);font-weight:900;margin-bottom:8px">🧠 <span class="grad">DocMind</span></h1>
223
+ <p style="color:var(--muted);font-size:.92rem;max-width:500px;margin:0 auto;line-height:1.65">A production-grade agentic research platform. Five specialized LangGraph agents collaborate to retrieve, grade, generate, and critique answers from your documents.</p>
224
+ </div>
225
+ <div class="stats-row" id="home-stats">
226
+ <div class="stat-item"><div class="stat-v grad" id="st-docs">0</div><div class="stat-l">PDFs Indexed</div></div>
227
+ <div class="stat-item"><div class="stat-v grad" id="st-chunks">0</div><div class="stat-l">Chunks Stored</div></div>
228
+ <div class="stat-item"><div class="stat-v grad" id="st-queries">0</div><div class="stat-l">Queries Run</div></div>
229
+ <div class="stat-item"><div class="stat-v grad" id="st-complete">0</div><div class="stat-l">Completed</div></div>
230
+ <div class="stat-item"><div class="stat-v grad" id="st-review">0</div><div class="stat-l">Pending Review</div></div>
231
+ </div>
232
+ <div class="home-grid">
233
+ <div class="home-card" onclick="nav('upload')"><div class="home-card-icon">📤</div><div class="home-card-name">Upload &amp; Index</div><div class="home-card-desc">Upload PDFs. Chunks are embedded with BAAI/bge-small-en-v1.5 locally and stored in a FAISS + BM25 hybrid index.</div></div>
234
+ <div class="home-card" onclick="nav('research')"><div class="home-card-icon">🔍</div><div class="home-card-name">Research Query</div><div class="home-card-desc">Ask any question. Watch the five LangGraph agents plan, retrieve, grade, generate, and critique in real time.</div></div>
235
+ <div class="home-card" onclick="nav('review')"><div class="home-card-icon">👁️</div><div class="home-card-name">Human Review</div><div class="home-card-desc">Answers flagged by the Critic agent appear here for your approval before being returned to the user.</div></div>
236
+ <div class="home-card" onclick="nav('obs')"><div class="home-card-icon">📊</div><div class="home-card-name">Observability</div><div class="home-card-desc">Live trace of every agent decision, per-agent latency, token usage, and retrieval quality scores.</div></div>
237
+ <div class="home-card" onclick="nav('tools')"><div class="home-card-icon">🔧</div><div class="home-card-name">Tool Playground</div><div class="home-card-desc">Test web search, calculator, and sandboxed code execution — the three tools the Planner agent can invoke.</div></div>
238
+ <div class="home-card" style="cursor:default">
239
+ <div class="home-card-icon">🤗</div>
240
+ <div class="home-card-name">Free HF Models</div>
241
+ <div class="home-card-desc" id="hf-status">Mistral-7B · Zephyr-7B · bge-small. Token: <span id="hf-token-status" style="color:var(--red)">not set</span></div>
242
+ </div>
243
+ </div>
244
+ </div>
245
+
246
+ <!-- ════════════════════════════════
247
+ PAGE: UPLOAD
248
+ ═════════════════════════════════ -->
249
+ <div class="page" id="page-upload">
250
+ <div class="s-tag s-tag-blue">FAISS + BM25 Hybrid Index · BAAI/bge-small-en-v1.5 (local)</div>
251
+ <div class="card-title" style="margin-bottom:16px"><i class="fas fa-upload"></i> Upload &amp; Index Documents</div>
252
+ <div class="card">
253
+ <div id="drop-zone" onclick="document.getElementById('file-input').click()" ondragover="dropOver(event)" ondragleave="dropLeave(event)" ondrop="dropFile(event)">
254
+ <i class="fas fa-file-pdf"></i>
255
+ <p><strong style="color:var(--text)">Click to upload</strong> or drag a PDF here</p>
256
+ <p style="margin-top:4px;font-size:.75rem">PDF files only · Max recommended 20 pages for free HF inference tier</p>
257
+ </div>
258
+ <input type="file" id="file-input" accept=".pdf" style="display:none" onchange="uploadFile(this.files[0])">
259
+ <div id="upload-progress" style="display:none;margin-top:12px">
260
+ <div class="loading-row"><div class="spinner"></div> Chunking, embedding, and indexing…</div>
261
+ </div>
262
+ <div id="upload-result"></div>
263
+ </div>
264
+ <div class="card" style="margin-top:16px">
265
+ <div class="card-title"><i class="fas fa-database"></i> Indexed Documents</div>
266
+ <div id="doc-list"><p style="color:var(--muted);font-size:.83rem">No documents indexed yet.</p></div>
267
+ </div>
268
+ </div>
269
+
270
+ <!-- ════════════════════════════════
271
+ PAGE: RESEARCH
272
+ ═════════════════════════════════ -->
273
+ <div class="page" id="page-research">
274
+ <div class="s-tag s-tag-blue">LangGraph · 5 Agents · Corrective RAG · Human-in-the-Loop</div>
275
+ <div class="card-title" style="margin-bottom:14px"><i class="fas fa-search"></i> Research Query</div>
276
+
277
+ <div class="card">
278
+ <div class="field">
279
+ <label>Your Research Question</label>
280
+ <textarea id="q-input" placeholder="Ask anything about your uploaded documents…" rows="3" onkeydown="researchKeydown(event)"></textarea>
281
+ </div>
282
+ <button class="btn btn-primary" id="q-btn" onclick="runResearch()"><i class="fas fa-bolt"></i> Run Research</button>
283
+ <div id="q-err"></div>
284
+ </div>
285
+
286
+ <!-- Agent Graph -->
287
+ <div class="card" id="graph-card" style="margin-top:16px;display:none">
288
+ <div class="card-title"><i class="fas fa-project-diagram"></i> LangGraph Agent State Machine</div>
289
+ <div id="agent-graph-wrap">
290
+ <svg id="agent-graph" viewBox="0 0 700 200" xmlns="http://www.w3.org/2000/svg">
291
+ <defs>
292
+ <marker id="arrow" viewBox="0 0 10 10" refX="9" refY="5" markerWidth="6" markerHeight="6" orient="auto-start-reverse">
293
+ <path d="M 0 0 L 10 5 L 0 10 z" fill="rgba(136,146,164,.4)"/>
294
+ </marker>
295
+ </defs>
296
+ <!-- Arrows -->
297
+ <line class="ag-arrow" x1="95" y1="70" x2="160" y2="70" marker-end="url(#arrow)"/>
298
+ <line class="ag-arrow" x1="245" y1="70" x2="310" y2="70" marker-end="url(#arrow)"/>
299
+ <line class="ag-arrow" x1="395" y1="70" x2="460" y2="70" marker-end="url(#arrow)"/>
300
+ <line class="ag-arrow" x1="545" y1="70" x2="610" y2="70" marker-end="url(#arrow)"/>
301
+ <!-- Rewriter loop arrow -->
302
+ <path class="ag-arrow" d="M 350 105 Q 350 150 280 150 Q 210 150 205 105" marker-end="url(#arrow)" stroke-dasharray="4,3"/>
303
+ <!-- Nodes -->
304
+ <g class="ag-node ag-node-idle" id="gn-planner">
305
+ <rect x="5" y="44" width="90" height="52" rx="10"/>
306
+ <text class="ag-label ag-pulse" x="50" y="63">PLANNER</text>
307
+ <text class="ag-sub" x="50" y="80">Mistral-7B</text>
308
+ </g>
309
+ <g class="ag-node ag-node-idle" id="gn-retriever">
310
+ <rect x="160" y="44" width="90" height="52" rx="10"/>
311
+ <text class="ag-label ag-pulse" x="205" y="63">RETRIEVER</text>
312
+ <text class="ag-sub" x="205" y="80">FAISS+BM25</text>
313
+ </g>
314
+ <g class="ag-node ag-node-idle" id="gn-grader">
315
+ <rect x="310" y="44" width="90" height="52" rx="10"/>
316
+ <text class="ag-label ag-pulse" x="355" y="63">GRADER</text>
317
+ <text class="ag-sub" x="355" y="80">Zephyr-7B</text>
318
+ </g>
319
+ <g class="ag-node ag-node-idle" id="gn-rewriter" style="opacity:.5">
320
+ <rect x="265" y="130" width="80" height="40" rx="8"/>
321
+ <text class="ag-label ag-pulse" x="305" y="150">REWRITER</text>
322
+ </g>
323
+ <g class="ag-node ag-node-idle" id="gn-generator">
324
+ <rect x="460" y="44" width="90" height="52" rx="10"/>
325
+ <text class="ag-label ag-pulse" x="505" y="63">GENERATOR</text>
326
+ <text class="ag-sub" x="505" y="80">Mistral-7B</text>
327
+ </g>
328
+ <g class="ag-node ag-node-idle" id="gn-critic">
329
+ <rect x="610" y="44" width="85" height="52" rx="10"/>
330
+ <text class="ag-label ag-pulse" x="652" y="63">CRITIC</text>
331
+ <text class="ag-sub" x="652" y="80">Zephyr-7B</text>
332
+ </g>
333
+ </svg>
334
+ </div>
335
+
336
+ <!-- Trace Log -->
337
+ <div style="margin-top:4px">
338
+ <div style="font-size:.72rem;font-weight:700;color:var(--muted);text-transform:uppercase;letter-spacing:.07em;margin-bottom:8px">Agent Trace</div>
339
+ <div id="trace-log"><p style="color:var(--muted);font-size:.8rem">Waiting for agent execution…</p></div>
340
+ </div>
341
+ </div>
342
+
343
+ <!-- Answer Card -->
344
+ <div class="card" id="answer-card">
345
+ <div class="card-title"><i class="fas fa-check-circle" style="color:var(--green)"></i> Research Answer</div>
346
+ <div class="output-box lit" id="answer-output"></div>
347
+ <div class="source-chips" id="source-chips"></div>
348
+ <div style="display:flex;gap:8px;margin-top:10px">
349
+ <button class="btn btn-ghost btn-sm" onclick="copyText('answer-output',this)"><i class="fas fa-copy"></i> Copy</button>
350
+ </div>
351
+ </div>
352
+
353
+ <!-- Flagged for review -->
354
+ <div class="card" id="flagged-card" style="display:none;border-color:rgba(245,158,11,.3)">
355
+ <div class="card-title"><i class="fas fa-exclamation-triangle" style="color:var(--gold)"></i> Flagged for Human Review</div>
356
+ <p style="color:var(--muted);font-size:.85rem">The Critic agent flagged this answer for potential quality issues. Go to <strong>Human Review</strong> to approve or reject it.</p>
357
+ <button class="btn btn-ghost btn-sm" style="margin-top:10px" onclick="nav('review')"><i class="fas fa-eye"></i> Go to Review Queue</button>
358
+ </div>
359
+ </div>
360
+
361
+ <!-- ════════════════════════════════
362
+ PAGE: HUMAN REVIEW
363
+ ═════════════════════════════════ -->
364
+ <div class="page" id="page-review">
365
+ <div class="s-tag s-tag-gold">Human-in-the-Loop · Critic Agent Escalations</div>
366
+ <div class="card-title" style="margin-bottom:16px"><i class="fas fa-eye"></i> Human Review Queue</div>
367
+ <div id="review-list"><p style="color:var(--muted);font-size:.84rem">No answers pending review.</p></div>
368
+ </div>
369
+
370
+ <!-- ════════════════════════════════
371
+ PAGE: OBSERVABILITY
372
+ ═════════════════════════════════ -->
373
+ <div class="page" id="page-obs">
374
+ <div class="s-tag s-tag-teal">LangSmith-Style Tracing · Per-Agent Metrics</div>
375
+ <div class="card-title" style="margin-bottom:16px"><i class="fas fa-chart-line"></i> Observability Dashboard</div>
376
+ <div class="obs-grid" id="obs-cards">
377
+ <div class="obs-card"><div class="obs-val grad" id="obs-total">0</div><div class="obs-lbl">Total Agent Calls</div></div>
378
+ <div class="obs-card"><div class="obs-val grad" id="obs-queries">0</div><div class="obs-lbl">Total Queries</div></div>
379
+ <div class="obs-card"><div class="obs-val grad" id="obs-avg-plan">—</div><div class="obs-lbl">Avg Planner Latency</div></div>
380
+ <div class="obs-card"><div class="obs-val grad" id="obs-avg-gen">—</div><div class="obs-lbl">Avg Generator Latency</div></div>
381
+ </div>
382
+ <div class="card">
383
+ <div class="card-title"><i class="fas fa-tachometer-alt"></i> Agent Call Distribution</div>
384
+ <div class="chart-wrap"><canvas id="obs-chart"></canvas></div>
385
+ </div>
386
+ <div class="card" style="margin-top:16px">
387
+ <div class="card-title"><i class="fas fa-stopwatch"></i> Avg Latency per Agent (ms)</div>
388
+ <div class="chart-wrap"><canvas id="lat-chart"></canvas></div>
389
+ </div>
390
+ </div>
391
+
392
+ <!-- ════════════════════════════════
393
+ PAGE: TOOLS
394
+ ═════════════════════════════════ -->
395
+ <div class="page" id="page-tools">
396
+ <div class="s-tag s-tag-blue">Function Calling · Tool Use</div>
397
+ <div class="card-title" style="margin-bottom:16px"><i class="fas fa-tools"></i> Tool Playground</div>
398
+ <div class="tool-grid">
399
+
400
+ <div class="tool-card">
401
+ <div class="tool-icon">🌐</div>
402
+ <div class="tool-name">Web Search</div>
403
+ <div class="tool-desc">DuckDuckGo free search — no API key required. Used by the Planner when web context is needed.</div>
404
+ <div class="field"><label>Search Query</label><textarea id="ws-inp" rows="2" placeholder="e.g. LangGraph tutorial 2025"></textarea></div>
405
+ <button class="btn btn-primary btn-sm" id="ws-btn" onclick="runTool('web_search','ws-inp','ws-out',this)"><i class="fas fa-search"></i> Search</button>
406
+ <div id="ws-out" class="output-box" style="margin-top:10px;min-height:60px;display:none"></div>
407
+ </div>
408
+
409
+ <div class="tool-card">
410
+ <div class="tool-icon">🧮</div>
411
+ <div class="tool-name">Calculator</div>
412
+ <div class="tool-desc">Safe AST-based math evaluator supporting +, −, ×, ÷, ^, and all Python math module functions.</div>
413
+ <div class="field"><label>Expression</label><textarea id="calc-inp" rows="2" placeholder="e.g. sqrt(144) + log(100)"></textarea></div>
414
+ <button class="btn btn-primary btn-sm" id="calc-btn" onclick="runTool('calculator','calc-inp','calc-out',this)"><i class="fas fa-equals"></i> Calculate</button>
415
+ <div id="calc-out" class="output-box" style="margin-top:10px;min-height:60px;display:none"></div>
416
+ </div>
417
+
418
+ <div class="tool-card">
419
+ <div class="tool-icon">💻</div>
420
+ <div class="tool-name">Code Runner</div>
421
+ <div class="tool-desc">Sandboxed Python execution with safe builtins only (no file I/O, no network). Captures stdout output.</div>
422
+ <div class="field"><label>Python Code</label><textarea id="code-inp" rows="4" placeholder="x = [i**2 for i in range(10)]&#10;print(sum(x))"></textarea></div>
423
+ <button class="btn btn-primary btn-sm" id="code-btn" onclick="runTool('code','code-inp','code-out',this)"><i class="fas fa-play"></i> Run</button>
424
+ <div id="code-out" class="output-box" style="margin-top:10px;min-height:60px;display:none;font-family:var(--mono)"></div>
425
+ </div>
426
+
427
+ </div>
428
+ </div>
429
+
430
+ </div><!-- /#main -->
431
+
432
+ <!-- ════════════════════════════════════════════════════
433
+ JAVASCRIPT
434
+ ═════════════════════════════════════════════════════ -->
435
+ <script>
436
+ // ── GLOBALS ─────────────────────────────────────────────────────────────
437
+ let currentQid = null;
438
+ let pollTimer = null;
439
+ let seenSteps = 0;
440
+ let uploadedDocs = []; // {name, chunks}
441
+ let obsChart = null;
442
+ let latChart = null;
443
+
444
+ const PAGE_LABELS = {
445
+ home:'Overview', upload:'Upload & Index', research:'Research Query',
446
+ review:'Human Review', obs:'Observability', tools:'Tool Playground'
447
+ };
448
+
449
+ const NODE_AGENT_MAP = {
450
+ planner:'gn-planner', retriever:'gn-retriever', grader:'gn-grader',
451
+ rewriter:'gn-rewriter', generator:'gn-generator', critic:'gn-critic',
452
+ human_review:'gn-critic'
453
+ };
454
+
455
+ // ── NAVIGATION ──────────────────────────────────────────────────────────
456
+ function nav(id) {
457
+ document.querySelectorAll('.page').forEach(p => p.classList.remove('active'));
458
+ document.querySelectorAll('.nav-item').forEach(n => n.classList.remove('active'));
459
+ document.getElementById('page-' + id).classList.add('active');
460
+ document.getElementById('nav-' + id).classList.add('active');
461
+ document.getElementById('topbar-label').textContent = PAGE_LABELS[id];
462
+ if (id === 'obs') loadObs();
463
+ if (id === 'review') loadReview();
464
+ }
465
+
466
+ // ── THEME ────────────────────────────────────────────────────────────────
467
+ function toggleTheme() {
468
+ const d = document.documentElement;
469
+ const light = d.getAttribute('data-theme') === 'dark';
470
+ d.setAttribute('data-theme', light ? 'light' : 'dark');
471
+ document.getElementById('theme-icon').className = light ? 'fas fa-moon' : 'fas fa-sun';
472
+ localStorage.setItem('dm-theme', light ? 'light' : 'dark');
473
+ }
474
+ (function(){ const t = localStorage.getItem('dm-theme') || 'dark'; document.documentElement.setAttribute('data-theme',t); document.getElementById('theme-icon').className = t==='dark'?'fas fa-moon':'fas fa-sun'; })();
475
+
476
+ // ── HEALTH CHECK ─────────────────────────────────────────────────────────
477
+ async function checkHealth() {
478
+ try {
479
+ const r = await fetch('/health'); const d = await r.json();
480
+ document.getElementById('sb-status').textContent = d.token_set ? 'HF Token Active' : 'Token Missing';
481
+ document.getElementById('hf-token-status').textContent = d.token_set ? 'set ✅' : 'not set ❌';
482
+ document.getElementById('hf-token-status').style.color = d.token_set ? 'var(--green)' : 'var(--red)';
483
+ } catch(e) {}
484
+ }
485
+ async function refreshStats() {
486
+ try {
487
+ const r = await fetch('/api/stats'); const d = await r.json();
488
+ document.getElementById('st-docs').textContent = d.docs_indexed;
489
+ document.getElementById('st-chunks').textContent = d.chunks_stored;
490
+ document.getElementById('st-queries').textContent = d.queries_run;
491
+ document.getElementById('st-complete').textContent= d.queries_complete;
492
+ document.getElementById('st-review').textContent = d.pending_review;
493
+ // Review badge
494
+ const badge = document.getElementById('review-badge');
495
+ badge.style.display = d.pending_review > 0 ? 'inline' : 'none';
496
+ badge.textContent = d.pending_review;
497
+ } catch(e) {}
498
+ }
499
+ checkHealth();
500
+ refreshStats();
501
+ setInterval(refreshStats, 8000);
502
+
503
+ // ── UPLOAD ───────────────────────────────────────────────────────────────
504
+ function dropOver(e) { e.preventDefault(); document.getElementById('drop-zone').classList.add('drag-over'); }
505
+ function dropLeave(e) { document.getElementById('drop-zone').classList.remove('drag-over'); }
506
+ function dropFile(e) { e.preventDefault(); dropLeave(e); if (e.dataTransfer.files[0]) uploadFile(e.dataTransfer.files[0]); }
507
+
508
+ async function uploadFile(file) {
509
+ if (!file || !file.name.endsWith('.pdf')) {
510
+ showErr('upload-result', 'Only PDF files are supported.'); return;
511
+ }
512
+ document.getElementById('upload-progress').style.display = 'block';
513
+ document.getElementById('upload-result').innerHTML = '';
514
+ const fd = new FormData(); fd.append('file', file);
515
+ try {
516
+ const r = await fetch('/api/upload', {method:'POST', body:fd});
517
+ const d = await r.json();
518
+ document.getElementById('upload-progress').style.display = 'none';
519
+ if (d.error) { showErr('upload-result', d.error); return; }
520
+ uploadedDocs.push({name: d.filename, chunks: d.chunks});
521
+ renderDocList();
522
+ document.getElementById('upload-result').innerHTML =
523
+ `<div style="background:rgba(34,197,94,.08);border:1px solid rgba(34,197,94,.2);border-radius:8px;padding:10px 14px;margin-top:10px;font-size:.83rem;color:var(--green)">✅ <strong>${d.filename}</strong> indexed — ${d.chunks} chunks · Total: ${d.total_chunks} chunks across ${d.total_docs} doc(s).</div>`;
524
+ refreshStats();
525
+ } catch(e) {
526
+ document.getElementById('upload-progress').style.display = 'none';
527
+ showErr('upload-result', e.message);
528
+ }
529
+ }
530
+
531
+ function renderDocList() {
532
+ const el = document.getElementById('doc-list');
533
+ if (!uploadedDocs.length) { el.innerHTML = '<p style="color:var(--muted);font-size:.83rem">No documents indexed yet.</p>'; return; }
534
+ el.innerHTML = uploadedDocs.map(d => `
535
+ <div class="doc-item">
536
+ <i class="fas fa-file-pdf doc-icon"></i>
537
+ <span class="doc-name">${d.name}</span>
538
+ <span class="doc-chunks">${d.chunks} chunks</span>
539
+ </div>`).join('');
540
+ }
541
+
542
+ // ── RESEARCH ─────────────────────────────────────────────────────────────
543
+ function researchKeydown(e) { if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); runResearch(); } }
544
+
545
+ async function runResearch() {
546
+ const q = document.getElementById('q-input').value.trim();
547
+ if (!q) return;
548
+ clearErr('q-err');
549
+ resetGraph();
550
+ document.getElementById('graph-card').style.display = 'block';
551
+ document.getElementById('answer-card').style.display = 'none';
552
+ document.getElementById('flagged-card').style.display= 'none';
553
+ document.getElementById('trace-log').innerHTML = '<p style="color:var(--muted);font-size:.8rem">Starting agents…</p>';
554
+ document.getElementById('q-btn').disabled = true;
555
+ seenSteps = 0;
556
+
557
+ try {
558
+ const r = await fetch('/api/research', {method:'POST', headers:{'Content-Type':'application/json'}, body:JSON.stringify({question: q})});
559
+ const d = await r.json();
560
+ if (d.error) { showErr('q-err', d.error); document.getElementById('q-btn').disabled=false; return; }
561
+ currentQid = d.query_id;
562
+ pollTimer = setInterval(pollTrace, 2000);
563
+ } catch(e) {
564
+ showErr('q-err', e.message);
565
+ document.getElementById('q-btn').disabled = false;
566
+ }
567
+ }
568
+
569
+ async function pollTrace() {
570
+ if (!currentQid) return;
571
+ try {
572
+ const r = await fetch('/api/trace/' + currentQid);
573
+ const d = await r.json();
574
+ renderTraceSteps(d.trace || []);
575
+ if (d.status === 'complete' || d.status === 'pending_review' || d.status === 'error') {
576
+ clearInterval(pollTimer);
577
+ document.getElementById('q-btn').disabled = false;
578
+ if (d.status === 'complete' && d.result) {
579
+ renderAnswer(d.result);
580
+ } else if (d.status === 'pending_review') {
581
+ document.getElementById('flagged-card').style.display = 'block';
582
+ loadReview();
583
+ } else if (d.status === 'error' && d.result) {
584
+ showErr('q-err', d.result.error || 'Unknown error.');
585
+ }
586
+ refreshStats();
587
+ }
588
+ } catch(e) {}
589
+ }
590
+
591
+ function renderTraceSteps(steps) {
592
+ const log = document.getElementById('trace-log');
593
+ if (steps.length === 0) return;
594
+ if (seenSteps === 0) log.innerHTML = '';
595
+ for (let i = seenSteps; i < steps.length; i++) {
596
+ const s = steps[i];
597
+ const lat = s.latency_ms > 0 ? `<span class="trace-lat">${s.latency_ms}ms</span>` : '';
598
+ log.innerHTML += `
599
+ <div class="trace-step">
600
+ <span class="trace-badge badge-${s.agent}">${s.agent}</span>
601
+ <span class="trace-msg">${esc(s.message)}</span>
602
+ ${lat}
603
+ <span class="trace-ts">${s.ts}</span>
604
+ </div>`;
605
+ setNodeStatus(s.agent, s.status === 'running' ? 'running' : 'complete');
606
+ }
607
+ seenSteps = steps.length;
608
+ log.scrollTop = log.scrollHeight;
609
+ }
610
+
611
+ function renderAnswer(result) {
612
+ document.getElementById('answer-card').style.display = 'block';
613
+ document.getElementById('answer-output').textContent = result.generation || 'No answer generated.';
614
+ const chips = document.getElementById('source-chips');
615
+ chips.innerHTML = '';
616
+ if (result.graded_docs) {
617
+ const sources = [...new Set(result.graded_docs.map(d => `${d.source} p.${d.page}`))];
618
+ sources.forEach(s => { chips.innerHTML += `<span class="source-chip">📄 ${s}</span>`; });
619
+ }
620
+ }
621
+
622
+ function resetGraph() {
623
+ ['planner','retriever','grader','rewriter','generator','critic'].forEach(a => setNodeStatus(a, 'idle'));
624
+ document.getElementById('gn-rewriter').style.opacity = '.5';
625
+ }
626
+
627
+ function setNodeStatus(agent, status) {
628
+ const id = NODE_AGENT_MAP[agent];
629
+ if (!id) return;
630
+ const el = document.getElementById(id);
631
+ if (!el) return;
632
+ el.className = `ag-node ag-node-${status}`;
633
+ if (agent === 'rewriter') el.style.opacity = status === 'idle' ? '.5' : '1';
634
+ }
635
+
636
+ // ── HUMAN REVIEW ─────────────────────────────────────────────────────────
637
+ async function loadReview() {
638
+ try {
639
+ const r = await fetch('/api/review'); const d = await r.json();
640
+ const list = document.getElementById('review-list');
641
+ if (!d.pending || !d.pending.length) {
642
+ list.innerHTML = '<p style="color:var(--muted);font-size:.84rem">No answers pending review. The Critic agent will route flagged answers here.</p>'; return;
643
+ }
644
+ list.innerHTML = d.pending.map(item => `
645
+ <div class="review-card" id="rc-${item.query_id}">
646
+ <div class="review-card-q">❓ ${esc(item.question)}</div>
647
+ <div class="review-card-a">${esc(item.generation)}</div>
648
+ <div class="review-card-c">⚠️ Critic flag: ${esc(item.critique || 'Quality concerns detected.')}</div>
649
+ <div class="review-actions">
650
+ <button class="btn btn-green btn-sm" onclick="doReview('${item.query_id}','approve')"><i class="fas fa-check"></i> Approve</button>
651
+ <button class="btn btn-red btn-sm" onclick="doReview('${item.query_id}','reject')"><i class="fas fa-times"></i> Reject</button>
652
+ </div>
653
+ </div>`).join('');
654
+ } catch(e) {}
655
+ }
656
+
657
+ async function doReview(qid, action) {
658
+ try {
659
+ await fetch('/api/review/'+qid, {method:'POST', headers:{'Content-Type':'application/json'}, body:JSON.stringify({action})});
660
+ document.getElementById('rc-'+qid)?.remove();
661
+ refreshStats();
662
+ if (!document.querySelector('.review-card')) {
663
+ document.getElementById('review-list').innerHTML = '<p style="color:var(--muted);font-size:.84rem">All reviews complete.</p>';
664
+ }
665
+ } catch(e) {}
666
+ }
667
+
668
+ // ── OBSERVABILITY ─────────────────────────────────────────────────────────
669
+ async function loadObs() {
670
+ try {
671
+ const r = await fetch('/api/observability'); const d = await r.json();
672
+ document.getElementById('obs-total').textContent = d.total_calls;
673
+ document.getElementById('obs-queries').textContent = d.total_queries;
674
+ const lat = d.avg_latency_ms || {};
675
+ document.getElementById('obs-avg-plan').textContent = lat.planner ? lat.planner+'ms' : '—';
676
+ document.getElementById('obs-avg-gen').textContent = lat.generator ? lat.generator+'ms' : '—';
677
+ renderObsCharts(d);
678
+ } catch(e) {}
679
+ }
680
+
681
+ function isDark() { return document.documentElement.getAttribute('data-theme') !== 'light'; }
682
+ function gc() { return isDark() ? 'rgba(255,255,255,.05)' : 'rgba(0,0,0,.06)'; }
683
+ function tc() { return isDark() ? '#8892a4' : '#4b5675'; }
684
+ function tt() {
685
+ return {
686
+ backgroundColor: isDark() ? 'rgba(7,13,31,.95)' : 'rgba(255,255,255,.97)',
687
+ titleColor: isDark() ? '#e2e8f0' : '#0f172a',
688
+ bodyColor: isDark() ? '#8892a4' : '#4b5675',
689
+ borderColor: isDark() ? 'rgba(79,142,247,.3)' : 'rgba(37,99,235,.2)',
690
+ borderWidth: 1
691
+ };
692
+ }
693
+
694
+ function renderObsCharts(d) {
695
+ const agents = ['planner','retriever','grader','rewriter','generator','critic'];
696
+ const calls = agents.map(a => (d.agent_calls||{})[a] || 0);
697
+ const lats = agents.map(a => (d.avg_latency_ms||{})[a] || 0);
698
+ const colors = ['rgba(79,142,247,.7)','rgba(6,182,212,.7)','rgba(245,158,11,.7)','rgba(239,68,68,.65)','rgba(167,139,250,.7)','rgba(239,68,68,.5)'];
699
+ const t = tc(), g = gc(), tip = tt();
700
+
701
+ if (obsChart) obsChart.destroy();
702
+ obsChart = new Chart(document.getElementById('obs-chart'), {
703
+ type:'bar', data:{ labels:agents, datasets:[{label:'Total Calls', data:calls, backgroundColor:colors, borderRadius:6}] },
704
+ options:{ responsive:true, maintainAspectRatio:false, plugins:{legend:{labels:{color:t}},tooltip:tip}, scales:{x:{ticks:{color:t},grid:{color:g}},y:{ticks:{color:t},grid:{color:g},beginAtZero:true,title:{display:true,text:'Calls',color:t,font:{size:11}}}} }
705
+ });
706
+ if (latChart) latChart.destroy();
707
+ latChart = new Chart(document.getElementById('lat-chart'), {
708
+ type:'bar', data:{ labels:agents, datasets:[{label:'Avg Latency (ms)', data:lats, backgroundColor:colors, borderRadius:6}] },
709
+ options:{ responsive:true, maintainAspectRatio:false, plugins:{legend:{labels:{color:t}},tooltip:tip}, scales:{x:{ticks:{color:t},grid:{color:g}},y:{ticks:{color:t},grid:{color:g},beginAtZero:true,title:{display:true,text:'ms',color:t,font:{size:11}}}} }
710
+ });
711
+ }
712
+
713
+ // ── TOOLS ─────────────────────────────────────────────────────────────────
714
+ async function runTool(name, inpId, outId, btn) {
715
+ const inp = document.getElementById(inpId).value.trim();
716
+ if (!inp) return;
717
+ btn.disabled = true; btn.innerHTML = '<div class="spinner"></div>';
718
+ const outEl = document.getElementById(outId);
719
+ outEl.style.display = 'block'; outEl.textContent = 'Running…';
720
+ try {
721
+ const r = await fetch('/api/tool/'+name, {method:'POST', headers:{'Content-Type':'application/json'}, body:JSON.stringify({input:inp})});
722
+ const d = await r.json();
723
+ outEl.textContent = d.error ? '❌ ' + d.error : d.result;
724
+ outEl.classList.toggle('lit', !d.error);
725
+ } catch(e) { outEl.textContent = '❌ ' + e.message; }
726
+ btn.disabled = false;
727
+ const icons = {web_search:'<i class="fas fa-search"></i> Search', calculator:'<i class="fas fa-equals"></i> Calculate', code:'<i class="fas fa-play"></i> Run'};
728
+ btn.innerHTML = icons[name] || 'Run';
729
+ }
730
+
731
+ // ── UTILS ─────────────────────────────────────────────────────────────────
732
+ function esc(s) { return String(s||'').replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/\n/g,'<br>'); }
733
+ function showErr(id, msg) { document.getElementById(id).innerHTML = `<div class="err-box"><i class="fas fa-exclamation-triangle"></i><div>${msg}</div></div>`; }
734
+ function clearErr(id) { document.getElementById(id).innerHTML = ''; }
735
+ function copyText(id, btn) {
736
+ navigator.clipboard.writeText(document.getElementById(id).innerText).then(() => {
737
+ btn.innerHTML = '<i class="fas fa-check"></i> Copied!';
738
+ setTimeout(() => { btn.innerHTML = '<i class="fas fa-copy"></i> Copy'; }, 1800);
739
+ });
740
+ }
741
+ </script>
742
+ </body>
743
+ </html>
tools/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # (empty)
tools/calculator.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast, math, operator, re
2
+
3
+ _SAFE_OPS = {
4
+ ast.Add: operator.add, ast.Sub: operator.sub,
5
+ ast.Mult: operator.mul, ast.Div: operator.truediv,
6
+ ast.Pow: operator.pow, ast.USub: operator.neg,
7
+ ast.Mod: operator.mod, ast.FloorDiv: operator.floordiv,
8
+ }
9
+ _SAFE_NAMES = {k: getattr(math, k) for k in dir(math) if not k.startswith("_")}
10
+ _SAFE_NAMES.update({"abs": abs, "round": round, "int": int, "float": float})
11
+
12
+
13
+ def _safe_eval(node):
14
+ if isinstance(node, ast.Constant):
15
+ return node.value
16
+ if isinstance(node, ast.BinOp):
17
+ op = _SAFE_OPS.get(type(node.op))
18
+ if op is None:
19
+ raise ValueError(f"Unsupported operator: {node.op}")
20
+ return op(_safe_eval(node.left), _safe_eval(node.right))
21
+ if isinstance(node, ast.UnaryOp) and isinstance(node.op, ast.USub):
22
+ return -_safe_eval(node.operand)
23
+ if isinstance(node, ast.Call):
24
+ func = node.func.id if isinstance(node.func, ast.Name) else None
25
+ if func in _SAFE_NAMES:
26
+ return _SAFE_NAMES[func](*[_safe_eval(a) for a in node.args])
27
+ if isinstance(node, ast.Name) and node.id in _SAFE_NAMES:
28
+ return _SAFE_NAMES[node.id]
29
+ raise ValueError(f"Unsafe expression: {ast.dump(node)}")
30
+
31
+
32
+ def calculate(expr: str) -> str:
33
+ try:
34
+ expr = re.sub(r"[^0-9+\-*/().,%^ \t\na-zA-Z_]", "", expr).strip()
35
+ tree = ast.parse(expr, mode="eval")
36
+ val = _safe_eval(tree.body)
37
+ return f"Result: {val}"
38
+ except Exception as exc:
39
+ return f"Calculation error: {exc}"
tools/code_tool.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io, contextlib
2
+
3
+ _SAFE_BUILTINS = {
4
+ k: v for k, v in vars(__builtins__).items()
5
+ if k in {"print","range","len","sum","max","min","abs","round","sorted",
6
+ "list","dict","set","tuple","str","int","float","bool","enumerate",
7
+ "zip","map","filter","isinstance","type","repr","chr","ord"}
8
+ } if isinstance(vars(__builtins__), dict) else {}
9
+
10
+
11
+ def run_code(code: str) -> str:
12
+ buf = io.StringIO()
13
+ try:
14
+ with contextlib.redirect_stdout(buf):
15
+ exec(code, {"__builtins__": _SAFE_BUILTINS}, {})
16
+ out = buf.getvalue()
17
+ return out.strip() if out.strip() else "✅ Code executed successfully (no output)."
18
+ except Exception as exc:
19
+ return f"❌ Error: {exc}"
tools/web_search.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from duckduckgo_search import DDGS
2
+
3
+ def web_search(query: str, max_results: int = 4) -> str:
4
+ try:
5
+ with DDGS() as ddgs:
6
+ hits = list(ddgs.text(query, max_results=max_results))
7
+ if not hits:
8
+ return "No results found."
9
+ lines = []
10
+ for h in hits:
11
+ lines.append(f"Title: {h.get('title','')}\nSnippet: {h.get('body','')}\nURL: {h.get('href','')}\n")
12
+ return "\n".join(lines)
13
+ except Exception as exc:
14
+ return f"Search error: {exc}"
tracing/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # (empty)
tracing/tracer.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import threading
2
+ from datetime import datetime
3
+
4
+
5
+ class Tracer:
6
+ def __init__(self):
7
+ self._lock = threading.Lock()
8
+ self._traces = {} # query_id → [step, ...]
9
+ self._global = {"agent_calls": {}, "latencies": {}, "total_calls": 0}
10
+
11
+ def add(self, query_id: str, agent: str, message: str, status: str, latency_ms: int):
12
+ step = {
13
+ "agent": agent,
14
+ "message": message,
15
+ "status": status,
16
+ "latency_ms": latency_ms,
17
+ "ts": datetime.utcnow().strftime("%H:%M:%S"),
18
+ }
19
+ with self._lock:
20
+ self._traces.setdefault(query_id, []).append(step)
21
+ self._global["agent_calls"].setdefault(agent, 0)
22
+ self._global["agent_calls"][agent] += 1
23
+ self._global["latencies"].setdefault(agent, [])
24
+ if latency_ms > 0:
25
+ self._global["latencies"][agent].append(latency_ms)
26
+ self._global["total_calls"] += 1
27
+
28
+ def get(self, query_id: str) -> list:
29
+ with self._lock:
30
+ return list(self._traces.get(query_id, []))
31
+
32
+ def stats(self) -> dict:
33
+ with self._lock:
34
+ avg_lat = {
35
+ agent: round(sum(v) / len(v)) if v else 0
36
+ for agent, v in self._global["latencies"].items()
37
+ }
38
+ return {
39
+ "agent_calls": dict(self._global["agent_calls"]),
40
+ "avg_latency_ms": avg_lat,
41
+ "total_calls": self._global["total_calls"],
42
+ "total_queries": len(self._traces),
43
+ }