Spaces:

tarekmasryo
/

rag-qa-command-cente

Running

rag-qa-command-cente / src /views /trace_explorer.py

Tarek Masryo

chore: update project files

6bef416 25 days ago

4.16 kB

	from __future__ import annotations

	import pandas as pd
	import streamlit as st

	from src.analytics import RETRIEVAL_OK_THRESHOLD, top_examples
	from src.text_search import literal_text_mask
	from src.ui import callout, section_title, trace_box


	class TraceExplorerViewMixin:
	"""Example-level trace review page."""

	def _page_trace_explorer(self) -> None:
	section_title("Trace Explorer", "Inspect examples, retrieved chunks, diagnosis, and metadata.")
	mode = st.radio("Example queue", ["High risk", "Incorrect", "Hallucination", "Low retrieval"], horizontal=True, key="trace_mode")
	examples = top_examples(self.ctx.filtered_eval, mode=mode, n=150, reference_df=self.data.eval_runs)
	search = st.text_input("Search questions", "", key="trace_search")
	if search and "query" in examples.columns:
	examples = examples[literal_text_mask(examples["query"], search)]
	if examples.empty:
	callout("warn", "No examples", "Try another queue or relax filters.")
	return

	id_col = "example_id" if "example_id" in examples.columns else examples.columns[0]
	selected_id = st.selectbox("Select example", examples[id_col].astype(str).tolist(), key="trace_example_id")
	ex = examples[examples[id_col].astype(str) == selected_id].iloc[0]
	self._render_trace_detail(ex, selected_id)

	def _render_trace_detail(self, ex: pd.Series, selected_id: str) -> None:
	left, middle, right = st.columns([1.15, 1.0, 0.85], gap="large")
	with left:
	trace_box("Question", str(ex.get("query", "No query column available.")))
	trace_box("Gold answer", str(ex.get("gold_answer", "No gold answer available.")))
	with middle:
	st.json(
	{
	"domain": ex.get("domain"),
	"scenario_type": ex.get("scenario_type"),
	"difficulty": ex.get("difficulty"),
	"correct": ex.get("is_correct"),
	"hallucination": ex.get("hallucination_flag"),
	"recall_at_10": ex.get("recall_at_10"),
	"mrr_at_10": ex.get("mrr_at_10"),
	}
	)
	with right:
	diagnosis, kind = self._diagnose_example(ex)
	callout(kind, diagnosis, "Use the retrieved chunks below to validate evidence and decide the next debugging action.")
	self._render_retrieved_chunks(selected_id)

	@staticmethod
	def _diagnose_example(ex: pd.Series) -> tuple[str, str]:
	correct = float(ex.get("is_correct", 0) or 0)
	hallucination = float(ex.get("hallucination_flag", 0) or 0)
	recall = float(ex.get("recall_at_10", 0) or 0)
	if hallucination >= 0.5:
	return "Hallucination review needed", "bad"
	if recall < RETRIEVAL_OK_THRESHOLD and correct < 0.5:
	return "Likely retrieval failure", "bad"
	if recall >= RETRIEVAL_OK_THRESHOLD and correct < 0.5:
	return "Likely generation failure", "warn"
	return "Healthy or recovered case", "good"

	def _render_retrieved_chunks(self, selected_id: str) -> None:
	if "example_id" not in self.data.retrieval_events.columns or "chunk_id" not in self.data.retrieval_events.columns:
	callout("info", "No retrieval table", "Retrieval events are unavailable in the packaged data.")
	return
	ret = self.ctx.filtered_retrieval[self.ctx.filtered_retrieval["example_id"].astype(str) == str(selected_id)].copy()
	if ret.empty:
	callout("info", "No retrieval rows", "This example has no matching retrieval event rows under the current filters.")
	return
	chunk_lookup = self.data.chunks[[c for c in ["chunk_id", "doc_id", "chunk_text"] if c in self.data.chunks.columns]].copy()
	ret = ret.merge(chunk_lookup, on="chunk_id", how="left")
	show_cols = [c for c in ["rank", "chunk_id", "retrieval_score", "is_relevant", "doc_id", "chunk_text"] if c in ret.columns]
	st.dataframe(ret[show_cols].sort_values("rank").head(12), use_container_width=True, hide_index=True)