Spaces:

NoobNovel
/

AdaptiveRAG

Sleeping

App Files Files Community

AdaptiveRAG / app.py

NoobNovel

Add Knowledge Base Versioning layer with full UI tab

66bd94a about 1 month ago

raw

history blame contribute delete

38.9 kB

	"""AdaptiveRAG — under-the-hood pipeline visualizer.

	Run: streamlit run app.py
	"""
	from __future__ import annotations

	import json
	import logging
	import os
	import tempfile
	import time
	from pathlib import Path

	os.environ.setdefault("ANONYMIZED_TELEMETRY", "False")

	# suppress harmless noise from Streamlit's torch inspector + ChromaDB posthog client
	logging.getLogger("streamlit.watcher.local_sources_watcher").setLevel(logging.ERROR)
	logging.getLogger("chromadb.telemetry.product.posthog").setLevel(logging.ERROR)

	import numpy as np
	import pandas as pd
	import streamlit as st

	from agent.critic import critique, refine_query
	from agent.planner import plan
	from agent.router import route
	from agent.tools import image_retrieve_and_reason
	from config import AGENT_CONFIG, EMBEDDING_CONFIG, HOSTED, LLM_CONFIG, PATHS, RETRIEVAL_CONFIG
	from ingestion.embedder import embed_query
	from ingestion.indexer import fetch_embeddings
	from llm.client_factory import get_llm
	from retrieval.dense import Hit, dense_search
	from retrieval.hybrid import reciprocal_rank_fusion
	from retrieval.reranker import rerank
	from retrieval.sparse import sparse_search

	st.set_page_config(page_title="AdaptiveRAG — Underhood", page_icon="🔬", layout="wide")

	# ── LLM backend check — shown before anything else ──────────────────
	if not os.environ.get("GROQ_API_KEY"):
	# Running without Groq — check if Ollama is reachable locally
	try:
	import requests as _req
	_req.get("http://localhost:11434/api/tags", timeout=2).raise_for_status()
	_ollama_ok = True
	except Exception:
	_ollama_ok = False
	if not _ollama_ok:
	st.error(
	"No LLM backend found.\n\n"
	"- Running on Hugging Face? Add your `GROQ_API_KEY` secret in "
	"Space Settings → Variables and secrets. Get a free key at "
	"[console.groq.com](https://console.groq.com).\n"
	"- Running locally? Start Ollama: `ollama serve`"
	)
	st.stop()

	# ───────────────────────────── styling ──────────────────────────────
	st.markdown(
	"""
	<style>
	.phase-card {
	border-left: 4px solid var(--accent, #4f8cff);
	padding: .6rem 1rem;
	margin: .25rem 0 .5rem 0;
	background: rgba(79,140,255,0.06);
	border-radius: 6px;
	}
	.phase-num { color: #4f8cff; font-weight: 700; margin-right: .4rem; }
	.pill { display: inline-block; padding: .15rem .55rem; border-radius: 999px;
	font-size: .78rem; font-weight: 600; margin-right: .4rem; }
	.pill-blue { background: #1e3a5f; color: #9ec5ff; }
	.pill-green { background: #1e4f30; color: #a3e6b5; }
	.pill-purple { background: #3d2a5e; color: #c8a8f5; }
	.pill-amber { background: #5e3f0e; color: #f3c97a; }
	.pill-red { background: #5a1f1f; color: #f3a3a3; }
	.pill-grey { background: #2c2c33; color: #b8b8c0; }
	.chunk-card {
	background: rgba(255,255,255,0.03);
	border: 1px solid rgba(255,255,255,0.08);
	border-radius: 6px; padding: .55rem .7rem; margin-bottom: .4rem;
	font-size: .82rem;
	}
	.chunk-meta { color: #9aa3b2; font-size: .73rem; margin-bottom: .25rem; }
	.mini-vec {
	font-family: 'SF Mono', Menlo, monospace; font-size: .68rem;
	color: #8b949e; word-break: break-all;
	}
	</style>
	""",
	unsafe_allow_html=True,
	)


	# ───────────────────────────── helpers ──────────────────────────────
	@st.cache_resource
	def _llm():
	return get_llm()


	def _load_manifest() -> dict:
	p = PATHS["manifest_path"]
	return json.loads(p.read_text()) if p.exists() else {}


	def phase_header(num: int, title: str, subtitle: str = "") -> None:
	st.markdown(
	f"<div class='phase-card'><span class='phase-num'>STEP {num}</span>"
	f"<b>{title}</b><br><span style='color:#9aa3b2;font-size:.85rem;'>{subtitle}</span>"
	f"</div>",
	unsafe_allow_html=True,
	)


	def hits_to_df(hits: list[Hit], score_label: str = "score") -> pd.DataFrame:
	rows = []
	for h in hits:
	title = h.metadata.get("title") or h.metadata.get("source_path", "?")
	short = title.split(" (")[0]
	if len(short) > 38:
	short = short[:35] + "…"
	label = f"{short} · p{h.metadata.get('page_start')} · {h.chunk_id.split('::')[-1]}"
	rows.append({"chunk": label, score_label: float(h.score), "chunk_id": h.chunk_id})
	return pd.DataFrame(rows)


	def render_hits(hits: list[Hit], badge_class: str, label: str, max_chars: int = 220) -> None:
	if not hits:
	st.caption(f"_(no {label.lower()} hits)_")
	return
	for i, h in enumerate(hits, start=1):
	meta = h.metadata
	snippet = h.text[:max_chars].replace("\n", " ")
	if len(h.text) > max_chars:
	snippet += "…"
	st.markdown(
	f"<div class='chunk-card'>"
	f"<div class='chunk-meta'>"
	f"<span class='pill {badge_class}'>{label} #{i}</span>"
	f"score <b>{h.score:.3f}</b> · "
	f"{meta.get('title','?')} · p.{meta.get('page_start')}–{meta.get('page_end')} · "
	f"<code>{h.chunk_id}</code>"
	f"</div>{snippet}</div>",
	unsafe_allow_html=True,
	)


	def pca_2d(matrix: np.ndarray) -> np.ndarray:
	centered = matrix - matrix.mean(axis=0, keepdims=True)
	_, _, vt = np.linalg.svd(centered, full_matrices=False)
	return centered @ vt[:2].T


	def vector_space_plot(query_vec: list[float], fused_hits: list[Hit],
	dense_ids: set[str], sparse_ids: set[str],
	kept_ids: set[str]) -> None:
	if not fused_hits:
	st.caption("_(nothing to plot)_")
	return
	embs = fetch_embeddings([h.chunk_id for h in fused_hits])
	rows = []
	vecs = [np.array(query_vec, dtype=np.float32)]
	for h in fused_hits:
	v = embs.get(h.chunk_id)
	if v is None:
	continue
	vecs.append(np.array(v, dtype=np.float32))
	in_d, in_s = h.chunk_id in dense_ids, h.chunk_id in sparse_ids
	in_keep = h.chunk_id in kept_ids
	if in_d and in_s:
	color = "fused (both)"
	elif in_d:
	color = "dense only"
	elif in_s:
	color = "sparse only"
	else:
	color = "other"
	title = (h.metadata.get("title") or "?").split(" (")[0][:40]
	label = f"{title} · p{h.metadata.get('page_start')}"
	rows.append({"label": label, "color": color, "size": 90 if in_keep else 50})
	if len(vecs) < 3:
	st.caption("_(need at least 2 hits for a 2D projection)_")
	return
	proj = pca_2d(np.vstack(vecs))
	df = pd.DataFrame(
	[{"x": proj[0, 0], "y": proj[0, 1], "label": "🔎 your question",
	"color": "QUERY", "size": 220}]
	+ [{"x": proj[i + 1, 0], "y": proj[i + 1, 1], **rows[i]}
	for i in range(len(rows))]
	)
	st.scatter_chart(
	df, x="x", y="y", color="color", size="size",
	height=380, use_container_width=True,
	)
	st.caption(
	"PCA projection of the query embedding + fused hit embeddings. "
	"Larger points survived cross-encoder reranking."
	)


	def render_embedding_card(query: str, qv: list[float], dt: float) -> None:
	arr = np.array(qv, dtype=np.float32)
	cols = st.columns([1, 1, 1, 3])
	cols[0].metric("Model", EMBEDDING_CONFIG["model"].split("/")[-1])
	cols[1].metric("Dimensions", len(qv))
	cols[2].metric("L2 norm", f"{float(np.linalg.norm(arr)):.3f}")
	cols[3].metric("Embed time", f"{dt*1000:.0f} ms")
	st.caption(f"Question ({len(query)} chars, ~{len(query.split())} words):")
	st.code(query, language="text")
	st.caption("First 32 dimensions of the embedding vector:")
	st.bar_chart(pd.DataFrame({"value": arr[:32]}), height=140, use_container_width=True)
	preview = ", ".join(f"{x:+.3f}" for x in arr[:8]) + ", …"
	st.markdown(f"<span class='mini-vec'>vector[0:8] = [{preview}]</span>",
	unsafe_allow_html=True)


	# ───────────────────────────── pipeline view ──────────────────────────────
	def _render_healing_trace(healing_trace: list[dict], health_score: float) -> None:
	"""Render the healing trace panel below the answer."""
	# Health score metric with colour
	if health_score >= 80:
	color, label = "#2ecc71", "Healthy"
	elif health_score >= 60:
	color, label = "#f39c12", "Fair"
	else:
	color, label = "#e74c3c", "Needs healing"
	st.markdown(
	f"<div style='display:flex;align-items:center;gap:1rem;margin:.5rem 0;'>"
	f"<span style='font-size:1.1rem;font-weight:700;color:{color};'>"
	f"⚕️ Health score: {health_score:.0f} / 100</span>"
	f"<span class='pill' style='background:{color}22;color:{color};border:1px solid {color}44;'>"
	f"{label}</span></div>",
	unsafe_allow_html=True,
	)
	if not healing_trace:
	st.success("✅ Answer passed all checks on first attempt — no healing needed.")
	return
	for attempt in healing_trace:
	num = attempt.get("attempt", "?")
	healthy = attempt.get("healthy", False)
	icon = "✅" if healthy else "🔧"
	issues = attempt.get("issues", [])
	label_str = "Healthy" if healthy else f"Issues: {', '.join(issues)}"
	with st.expander(f"{icon} Attempt {num} — {label_str}", expanded=not healthy):
	if healthy:
	st.success("All checks passed — answer accepted.")
	continue
	c1, c2, c3 = st.columns(3)
	c1.metric("Hallucination", "⚠️ yes" if "hallucination" in issues else "✅ no")
	c2.metric("Low chunk quality", "⚠️ yes" if "low_chunk_quality" in issues else "✅ no")
	c3.metric("Knowledge gap", "⚠️ yes" if "knowledge_gap" in issues else "✅ no")
	for action in attempt.get("actions", []):
	atype = action.get("type", "")
	detail = action.get("detail", "")
	icons = {
	"hallucination_fix": "🔍",
	"chunk_expansion": "📎",
	"web_search": "🌐",
	"query_rewrite": "✏️",
	"regenerate": "🔄",
	}
	prefix = icons.get(atype, "•")
	st.markdown(
	f"<div class='chunk-card'><span class='pill pill-grey'>{atype}</span>"
	f"{prefix} {detail}</div>",
	unsafe_allow_html=True,
	)
	if atype == "hallucination_fix":
	for s in action.get("flagged_sentences", []):
	st.caption(f" ↳ Flagged: \"{s[:120]}…\"")


	def visual_pipeline(query: str, enable_healing: bool = True) -> None:
	llm = _llm()

	# ── Step 1: embed the question ────────────────────────────────
	phase_header(1, "Question encoding",
	"Convert text → 384-dim dense vector via sentence-transformers (MiniLM-L6).")
	t0 = time.time()
	qv = embed_query(query)
	render_embedding_card(query, qv, time.time() - t0)

	# ── Step 2: Self-RAG router ────────────────────────────────
	phase_header(2, "Self-RAG router",
	"Decide whether to RETRIEVE, ANSWER_DIRECTLY, or CLARIFY before touching the index.")
	t0 = time.time()
	decision = route(query, llm=llm)
	dt = time.time() - t0
	pill_map = {"RETRIEVE": "pill-blue", "ANSWER_DIRECTLY": "pill-green", "CLARIFY": "pill-amber"}
	pill = pill_map.get(decision["action"], "pill-grey")
	st.markdown(
	f"<span class='pill {pill}'>{decision['action']}</span>"
	f"<span style='color:#9aa3b2;'>{decision.get('reason','')}</span>"
	f"<span style='float:right;color:#9aa3b2;font-size:.78rem;'>"
	f"router latency: {dt*1000:.0f} ms</span>",
	unsafe_allow_html=True,
	)

	if decision["action"] == "ANSWER_DIRECTLY":
	st.markdown("### Direct answer (no retrieval)")
	ans = llm.generate(prompt=query,
	system="You are a helpful research assistant. Be concise.",
	temperature=0.2)
	st.markdown(ans)
	return
	if decision["action"] == "CLARIFY":
	st.markdown("### Clarifying question")
	ans = llm.generate(
	prompt=("The user asked: " + query +
	"\n\nIt is too ambiguous to answer well. Ask one short clarifying question."),
	system="You are a helpful research assistant.",
	temperature=0.2,
	)
	st.markdown(ans)
	return

	# ── Iterations of plan → retrieve → answer → critique ────────
	accumulated: list[Hit] = []
	current_query = query

	for it in range(AGENT_CONFIG["max_iterations"]):
	st.markdown(f"---\n## 🔁 Iteration {it + 1}")
	if current_query != query:
	st.info(f"Refined query → {current_query}")

	# ── Step 3: plan ─────────────────────────────────────
	phase_header(3, "Planner", "LLM decomposes the question into focused sub-queries.")
	prior = ""
	if accumulated:
	titles = sorted({h.metadata.get("title", "?") for h in accumulated})
	prior = "Already gathered passages from: " + ", ".join(titles)
	t0 = time.time()
	steps = plan(current_query, prior_summary=prior, llm=llm)
	dt = time.time() - t0
	st.caption(f"Generated {len(steps)} sub-quer{'y' if len(steps)==1 else 'ies'} in {dt*1000:.0f} ms")
	for i, s in enumerate(steps, start=1):
	st.markdown(
	f"<div class='chunk-card'>"
	f"<span class='pill pill-purple'>sub-query {i}</span>"
	f"<b>{s['query']}</b>"
	f"<div class='chunk-meta' style='margin-top:.3rem;'>"
	f"rationale: {s.get('rationale','—')}</div></div>",
	unsafe_allow_html=True,
	)

	# ── Step 4: retrieval per sub-query ──────────────────
	phase_header(
	4,
	"Hybrid retrieval per sub-query",
	f"Dense (Chroma cosine, k={RETRIEVAL_CONFIG['dense_k']}) ∥ "
	f"Sparse (BM25, k={RETRIEVAL_CONFIG['sparse_k']}) → "
	f"Reciprocal Rank Fusion → Cross-encoder rerank "
	f"(BGE, top {RETRIEVAL_CONFIG['rerank_top_n']}).",
	)

	for si, step in enumerate(steps, start=1):
	with st.expander(f"Sub-query {si}: {step['query']}", expanded=(si == 1)):
	t0 = time.time()
	dense_hits = dense_search(step["query"])
	t_dense = time.time() - t0
	t0 = time.time()
	sparse_hits = sparse_search(step["query"])
	t_sparse = time.time() - t0
	t0 = time.time()
	fused = reciprocal_rank_fusion([dense_hits, sparse_hits],
	top_k=max(RETRIEVAL_CONFIG["dense_k"],
	RETRIEVAL_CONFIG["sparse_k"]))
	t_fuse = time.time() - t0
	t0 = time.time()
	reranked = rerank(step["query"], fused)
	t_rerank = time.time() - t0

	m1, m2, m3, m4 = st.columns(4)
	m1.metric("Dense hits", len(dense_hits), f"{t_dense*1000:.0f} ms")
	m2.metric("Sparse hits", len(sparse_hits), f"{t_sparse*1000:.0f} ms")
	m3.metric("After RRF", len(fused), f"{t_fuse*1000:.0f} ms")
	m4.metric("After rerank", len(reranked), f"{t_rerank*1000:.0f} ms")

	tabs = st.tabs([
	"🔵 Dense (vectors)",
	"🟢 Sparse (BM25)",
	"🟣 RRF fusion",
	"🟡 Cross-encoder rerank",
	"🗺️ Vector space",
	])
	with tabs[0]:
	st.caption("Top-K nearest neighbors by cosine similarity.")
	if dense_hits:
	st.bar_chart(hits_to_df(dense_hits, "cosine_sim"),
	x="chunk", y="cosine_sim",
	height=260, use_container_width=True)
	render_hits(dense_hits[:5], "pill-blue", "DENSE")

	with tabs[1]:
	st.caption("Top-K BM25 keyword matches (normalized).")
	if sparse_hits:
	st.bar_chart(hits_to_df(sparse_hits, "bm25_norm"),
	x="chunk", y="bm25_norm",
	height=260, use_container_width=True)
	render_hits(sparse_hits[:5], "pill-green", "BM25")

	with tabs[2]:
	st.caption(
	"Reciprocal Rank Fusion: score(d) = Σ 1/(k + rank). "
	"Combines dense + sparse rankings into one merged list."
	)
	if fused:
	st.bar_chart(hits_to_df(fused[:12], "rrf_score"),
	x="chunk", y="rrf_score",
	height=280, use_container_width=True)
	render_hits(fused[:5], "pill-purple", "FUSED")

	with tabs[3]:
	st.caption(
	"Cross-encoder scores (query, chunk) jointly — much more "
	"accurate than bi-encoder cosine, but slower → only run on "
	"the fused candidate set."
	)
	if reranked:
	st.bar_chart(hits_to_df(reranked, "ce_score"),
	x="chunk", y="ce_score",
	height=240, use_container_width=True)
	render_hits(reranked, "pill-amber", "RERANKED")

	with tabs[4]:
	dense_ids = {h.chunk_id for h in dense_hits}
	sparse_ids = {h.chunk_id for h in sparse_hits}
	kept_ids = {h.chunk_id for h in reranked}
	vector_space_plot(qv, fused[:20], dense_ids, sparse_ids, kept_ids)

	accumulated.extend(reranked)

	# ── Step 5: answer ─────────────────────────────────────
	# Dedupe + cap to 8 passages for the final prompt
	seen: set[str] = set()
	unique: list[Hit] = []
	for h in accumulated:
	if h.chunk_id in seen:
	continue
	seen.add(h.chunk_id)
	unique.append(h)
	if len(unique) >= 8:
	break
	context_lines, citations = [], []
	for i, h in enumerate(unique, start=1):
	meta = h.metadata
	head = (f"[{i}] {meta.get('title','?')} "
	f"(p.{meta.get('page_start')}-{meta.get('page_end')})")
	context_lines.append(f"{head}\n{h.text}")
	citations.append({
	"n": i, "chunk_id": h.chunk_id,
	"title": meta.get("title"),
	"source_path": meta.get("source_path"),
	"page_start": meta.get("page_start"),
	"page_end": meta.get("page_end"),
	"score": float(h.score),
	})
	context_block = "\n\n".join(context_lines)

	phase_header(5, "Context assembly + answer generation",
	f"Top {len(unique)} unique passages → {LLM_CONFIG['model']} via {LLM_CONFIG['provider']}.")
	with st.expander("📦 Context handed to the LLM", expanded=False):
	for c in citations:
	st.markdown(
	f"[{c['n']}] {c['title']} · pages {c['page_start']}–{c['page_end']} · "
	f"score `{c['score']:.3f}`"
	)
	st.code(context_block[:3000] + ("…" if len(context_block) > 3000 else ""),
	language="text")

	t0 = time.time()
	ANSWER_SYSTEM = (
	"You are a careful research assistant. Use ONLY the provided passages to "
	"answer the question. Cite sources inline with [N] where N is the passage "
	"number. If the passages are insufficient, say so explicitly."
	)
	ANSWER_PROMPT = (
	f"Question: {query}\n\nPassages:\n{context_block}\n\n"
	"Write a concise, well-grounded answer. Use inline citations like [1], [2] "
	"that match the passage numbers above."
	)
	answer = llm.generate(prompt=ANSWER_PROMPT, system=ANSWER_SYSTEM, temperature=0.1)
	st.caption(f"LLM generation: {time.time()-t0:.1f} s")
	st.markdown("### Answer")
	st.markdown(answer)

	st.markdown("### Citations")
	for c in citations:
	st.markdown(
	f"[{c['n']}] {c['title']} — pages {c['page_start']}–{c['page_end']} "
	f"· score `{c['score']:.3f}` · `{Path(c['source_path']).name}`"
	)

	# ── Step 6: critic ─────────────────────────────────────
	phase_header(6, "Self-critique",
	"LLM scores its own answer for grounding + completeness.")
	t0 = time.time()
	crit = critique(query, answer, context_block, llm=llm)
	c1, c2, c3 = st.columns(3)
	c1.metric("Grounded", "✅ yes" if crit["grounded"] else "⚠️ no")
	c2.metric("Complete", "✅ yes" if crit["complete"] else "⚠️ no")
	c3.metric("Confidence", f"{crit['confidence']:.2f}",
	delta=f"threshold {AGENT_CONFIG['confidence_threshold']:.2f}")
	if crit.get("missing"):
	st.warning(f"Missing: {crit['missing']}")
	st.caption(f"Critique latency: {time.time()-t0:.1f} s")

	if crit["confidence"] >= AGENT_CONFIG["confidence_threshold"] and crit["grounded"]:
	st.success(f"✓ Confidence {crit['confidence']:.2f} ≥ threshold — answer accepted.")
	if enable_healing:
	phase_header(
	7, "Self-Healing layer",
	"Hallucination detection → chunk quality scoring → knowledge gap → regenerate if needed.",
	)
	with st.spinner("Running self-healing checks…"):
	from healing.healing_loop import self_heal
	healed = self_heal(query, answer, unique, citations, llm=llm)
	if healed.attempts_used > 0:
	st.markdown("### Healed Answer")
	st.markdown(healed.answer)
	st.markdown("### Updated Citations")
	for c in healed.citations:
	st.markdown(
	f"[{c['n']}] {c['title']} — "
	f"pages {c.get('page_start')}–{c.get('page_end')} "
	f"· score `{c['score']:.3f}`"
	)
	_render_healing_trace(healed.healing_trace, healed.health_score)
	return

	if it < AGENT_CONFIG["max_iterations"] - 1:
	st.warning("Confidence below threshold — refining query and retrying.")
	current_query = refine_query(query, crit.get("missing", ""), llm=llm)
	else:
	st.error("Max iterations reached. Returning best-effort answer.")
	if enable_healing:
	phase_header(
	7, "Self-Healing layer",
	"Hallucination detection → chunk quality scoring → knowledge gap → regenerate if needed.",
	)
	with st.spinner("Running self-healing checks…"):
	from healing.healing_loop import self_heal
	healed = self_heal(query, answer, unique, citations, llm=llm)
	if healed.attempts_used > 0:
	st.markdown("### Healed Answer")
	st.markdown(healed.answer)
	_render_healing_trace(healed.healing_trace, healed.health_score)


	# ───────────────────────────── sidebar + tabs ──────────────────────────────
	def _sidebar() -> None:
	st.sidebar.title("AdaptiveRAG")
	st.sidebar.caption("Agentic + Self-RAG + Modular RAG")
	llm = _llm()
	ok = llm.health()
	backend = "Groq API" if HOSTED else "Ollama (local)"
	st.sidebar.markdown(f"LLM backend: {'🟢' if ok else '🔴'} {backend}")
	st.sidebar.markdown(f"Model: `{LLM_CONFIG['model']}`")
	st.sidebar.markdown(f"Embedder: `{EMBEDDING_CONFIG['model'].split('/')[-1]}`")
	st.sidebar.markdown(f"Reranker: `bge-reranker-base`")
	manifest = _load_manifest()
	if manifest:
	st.sidebar.markdown(f"Index: {manifest.get('n_chunks','?')} chunks across "
	f"{len(manifest.get('chunks_per_doc',{}))} docs")
	with st.sidebar.expander("Documents"):
	for doc, n in sorted(manifest.get("chunks_per_doc", {}).items()):
	st.markdown(f"- `{doc}` — {n}")
	else:
	st.sidebar.warning("No index found. Run `python ingest.py --reset`.")
	st.sidebar.divider()
	st.sidebar.markdown("### Pipeline")
	st.sidebar.code(
	"question\n ↓ embed (MiniLM)\n ↓ Self-RAG router\n ↓ planner → sub-queries\n"
	" ↓ dense ∥ sparse\n ↓ RRF fusion\n ↓ cross-encoder rerank\n ↓ LLM answer\n"
	" ↓ self-critique → retry?\n ↓ self-healing ⚕️\n → answer + citations",
	language="text",
	)


	def pipeline_tab() -> None:
	st.subheader("🔬 Underhood: watch every stage of the agentic RAG pipeline")
	st.caption(
	"Each step renders its inputs and outputs as it runs — embedding vector, "
	"router decision, planner sub-queries, dense vs sparse hits side-by-side, "
	"RRF fusion, cross-encoder rerank, vector-space projection, answer, self-critique."
	)
	samples = [
	"How does Self-RAG decide when to retrieve, and what reflection tokens does it use?",
	"Compare DDPM and DDIM sampling — what does DDIM gain by being non-Markovian?",
	"What is multi-head self-attention and why does parallelism matter?",
	"How does HyDE improve dense retrieval without relevance labels?",
	"How does ReAct combine reasoning and acting, vs chain-of-thought?",
	"hello, what can you do?",
	]
	if "vq" not in st.session_state:
	st.session_state.vq = samples[0]
	cols = st.columns(3)
	for i, s in enumerate(samples):
	if cols[i % 3].button(s, key=f"vs{i}", use_container_width=True):
	st.session_state.vq = s
	q = st.text_area("Question", value=st.session_state.vq, height=80, key="vq_input")
	enable_healing = st.toggle(
	"⚕️ Self-Healing",
	value=True,
	help="After the answer is generated, run hallucination detection, chunk quality "
	"scoring, and knowledge-gap checks — regenerating if issues are found.",
	)
	if st.button("▶ Run pipeline", type="primary"):
	if q.strip():
	visual_pipeline(q.strip(), enable_healing=enable_healing)


	def image_tab() -> None:
	st.subheader("🖼️ Multimodal RAG (Qwen3-VL)")
	st.caption(
	"Upload an image (e.g. a figure from a paper). Qwen3-VL captions it, the "
	"caption + question drives hybrid retrieval, then the model reasons over "
	"image + retrieved passages together."
	)
	uploaded = st.file_uploader("Image", type=["png", "jpg", "jpeg", "webp"])
	q = st.text_input("Question about the image", "Explain what this figure shows.")
	go = st.button("Reason", type="primary", key="img_go")
	if uploaded:
	st.image(uploaded, width=400)
	if not (go and uploaded):
	return
	with tempfile.NamedTemporaryFile(suffix=Path(uploaded.name).suffix, delete=False) as f:
	f.write(uploaded.getbuffer())
	tmp_path = f.name
	try:
	with st.spinner("Captioning → retrieving → multimodal reasoning..."):
	out = image_retrieve_and_reason(tmp_path, q, llm=_llm())
	st.markdown("### Caption")
	st.write(out["caption"])
	st.markdown("### Answer")
	st.markdown(out["answer"])
	st.markdown("### Retrieved passages")
	for i, h in enumerate(out["hits"], start=1):
	st.markdown(
	f"[{i}] {h.metadata.get('title')} "
	f"(p.{h.metadata.get('page_start')}–{h.metadata.get('page_end')}) "
	f"· score `{h.score:.3f}`"
	)
	st.caption(h.text[:300] + ("…" if len(h.text) > 300 else ""))
	finally:
	os.unlink(tmp_path)


	def kb_tab() -> None:
	"""Knowledge Base Versioning tab."""
	st.subheader("📚 Knowledge Base Versioning")
	st.caption(
	"Every ingest run creates a versioned snapshot of the index (kb_v1, kb_v2 …). "
	"Old snapshots are never deleted — rollback is a single metadata write. "
	"You can query any version independently and replay historical answers."
	)

	# ── lazy import so the tab loads even if versioning DB is empty ──
	try:
	from versioning.version_router import VersionRouter
	router = VersionRouter()
	history = router.list_versions()
	except Exception as exc:
	st.error(f"Could not initialise versioning layer: {exc}")
	return

	# ── current version banner ───────────────────────────────────────
	current = router.current_version()
	if current is None:
	st.warning(
	"No versioned snapshot found. Run `python ingest.py` to create v1."
	)
	return

	info = router.version_info(current)
	c1, c2, c3, c4 = st.columns(4)
	c1.metric("Current version", f"v{current}")
	c2.metric("Total snapshots", len(history))
	if info:
	c3.metric("Docs added (latest)", info.get("docs_added", 0))
	c4.metric("Docs changed (latest)", info.get("docs_changed", 0))

	st.divider()

	# ── version history table ────────────────────────────────────────
	st.markdown("### Version history")
	if history:
	df = pd.DataFrame(history)
	df["version"] = df["version"].apply(lambda v: f"v{v}")
	df["timestamp"] = df["timestamp"].str[:19].str.replace("T", " ")
	df = df.rename(columns={
	"version": "Version",
	"timestamp": "Created",
	"batch_name": "Batch",
	"docs_added": "Added",
	"docs_changed": "Changed",
	"docs_unchanged": "Unchanged",
	"reason": "Reason",
	"collection_name": "Collection",
	})
	st.dataframe(df, use_container_width=True, hide_index=True)
	else:
	st.info("No versions yet.")

	st.divider()

	# ── rollback ─────────────────────────────────────────────────────
	st.markdown("### Rollback")
	st.caption(
	"Rolling back points the 'latest' pointer at a previous snapshot. "
	"The current snapshot is not deleted — you can roll forward again any time."
	)
	version_nums = sorted([v["version"] for v in history])
	if len(version_nums) > 1:
	target = st.selectbox(
	"Roll back to",
	options=[v for v in version_nums if v != current],
	format_func=lambda v: f"v{v}",
	key="kb_rollback_target",
	)
	if st.button("⏪ Rollback", type="secondary"):
	try:
	router.rollback(target)
	st.success(f"Rolled back to v{target}. Reload the page to see updated metrics.")
	st.rerun()
	except Exception as exc:
	st.error(str(exc))
	else:
	st.info("Need at least 2 versions to roll back.")

	st.divider()

	# ── per-version diff view ─────────────────────────────────────────
	st.markdown("### What changed per version")
	for v in history:
	vnum = v["version"]
	added = v.get("docs_added", 0)
	changed = v.get("docs_changed", 0)
	unchanged = v.get("docs_unchanged", 0)
	ts = (v.get("timestamp") or "")[:19].replace("T", " ")
	badge = "🟢" if vnum == current else "⚪"
	label = f"{badge} v{vnum} — {ts} · +{added} added, ~{changed} changed, {unchanged} unchanged"
	with st.expander(label, expanded=(vnum == current)):
	cols = st.columns(4)
	cols[0].metric("Added", added)
	cols[1].metric("Changed", changed)
	cols[2].metric("Unchanged", unchanged)
	cols[3].metric("Reason", v.get("reason") or "—")
	st.caption(f"Collection: `{v.get('collection_name')}` · Batch: `{v.get('batch_name')}`")
	# list docs active at this version
	try:
	from versioning.document_store import DocumentStore
	store = DocumentStore()
	docs = store.docs_at_version(vnum)
	if docs:
	st.markdown("Documents active at this version:")
	for d in sorted(docs, key=lambda x: x["doc_id"]):
	status_icon = "✅" if d["status"] == "active" else "🗃️"
	chk = (d.get("checksum") or "")[:12]
	st.markdown(
	f"{status_icon} `{d['doc_id']}` — "
	f"{d.get('title','?')[:60]} "
	f"<span style='color:#9aa3b2;font-size:.75rem;'>"
	f"sha256:{chk}…</span>",
	unsafe_allow_html=True,
	)
	except Exception:
	pass

	st.divider()

	# ── cross-version query ───────────────────────────────────────────
	st.markdown("### Query a specific version")
	st.caption(
	"Run the same question against different snapshots to see how the "
	"knowledge base evolution affects retrieval."
	)
	qv_text = st.text_input(
	"Question",
	value="How does Self-RAG decide when to retrieve?",
	key="kb_query_text",
	)
	col_v, col_k = st.columns([2, 1])
	qv_version = col_v.selectbox(
	"Version to query",
	options=["latest"] + [f"v{v}" for v in sorted(version_nums, reverse=True)],
	key="kb_query_version",
	)
	qv_k = col_k.slider("Top-K", 3, 10, 5, key="kb_query_k")

	if st.button("🔍 Query version", key="kb_query_btn"):
	version_arg: str \| int = (
	"latest"
	if qv_version == "latest"
	else int(qv_version.lstrip("v"))
	)
	if not router.collection_exists(version_arg):
	st.error(
	f"ChromaDB collection for {qv_version} not found. "
	"The snapshot may exist in metadata but its collection was removed."
	)
	else:
	with st.spinner(f"Querying {qv_version}…"):
	try:
	hits, resolved = router.query(
	qv_text, version=version_arg, k=qv_k, log=True
	)
	except Exception as exc:
	st.error(str(exc))
	hits, resolved = [], None

	if hits:
	st.success(f"Retrieved {len(hits)} passages from v{resolved}.")
	for i, h in enumerate(hits, start=1):
	meta = h.metadata
	title = meta.get("title", "?")
	st.markdown(
	f"<div class='chunk-card'>"
	f"<div class='chunk-meta'>"
	f"<span class='pill pill-purple'>#{i}</span>"
	f"score <b>{h.score:.3f}</b> · {title} · "
	f"p.{meta.get('page_start')}–{meta.get('page_end')}"
	f"</div>{h.text[:300]}{'…' if len(h.text)>300 else ''}"
	f"</div>",
	unsafe_allow_html=True,
	)
	else:
	st.warning("No hits returned.")

	st.divider()

	# ── query audit log ───────────────────────────────────────────────
	st.markdown("### Query audit log")
	st.caption("Every versioned query is recorded here for replay and debugging.")
	log = router.get_query_log(limit=20)
	if log:
	df_log = pd.DataFrame(log)
	df_log["timestamp"] = df_log["timestamp"].str[:19].str.replace("T", " ")
	df_log["version_used"] = df_log["version_used"].apply(
	lambda v: f"v{v}" if v is not None else "—"
	)
	df_log = df_log.rename(columns={
	"timestamp": "Time",
	"query": "Query",
	"version_used": "Version",
	"answer_hash": "Hash",
	})
	st.dataframe(df_log, use_container_width=True, hide_index=True)
	else:
	st.info("No queries logged yet.")


	def main() -> None:
	_sidebar()
	st.title("AdaptiveRAG 📚🔬")
	st.caption(
	"Agentic + Self-RAG + Modular RAG over your local paper library — "
	f"powered by `{LLM_CONFIG['model']}` via {LLM_CONFIG['provider']}. "
	"Every pipeline stage is exposed below."
	)
	pipe, img, kb = st.tabs([
	"🔬 Underhood pipeline",
	"🖼️ Image Q&A (multimodal)",
	"📚 Knowledge Base",
	])
	with pipe:
	pipeline_tab()
	with img:
	image_tab()
	with kb:
	kb_tab()


	if __name__ == "__main__":
	main()