Spaces:

tarekmasryo
/

rag-qa-command-cente

Running

rag-qa-command-cente / src /views /quality_map.py

Tarek Masryo

chore: update project files

6bef416 19 days ago

2.13 kB

	from __future__ import annotations

	import numpy as np
	import pandas as pd
	import streamlit as st

	from src.charts import bubble_quality, heatmap
	from src.ui import callout, section_title


	class QualityMapViewMixin:
	"""Quality maps across domain, difficulty, and risk slices."""

	def _page_quality_map(self) -> None:
	section_title("Quality Map", "Where quality is strong, unstable, or worth deeper review.")
	domain_diff = self._domain_difficulty_table(self.ctx.filtered_eval)
	left, right = st.columns(2, gap="large")
	with left:
	self._plot(heatmap(domain_diff, "difficulty", "domain", "correct_rate", "Correctness · domain × difficulty"), "quality_correctness")
	with right:
	self._plot(heatmap(domain_diff, "difficulty", "domain", "hallucination_rate", "Hallucination · domain × difficulty"), "quality_hallucination")
	self._plot(bubble_quality(self.ctx.risk_slices, "Risk slice map"), "quality_risk_bubble")
	if not self.ctx.demand_coverage.empty:
	callout(
	"info",
	"Coverage interpretation",
	"Positive demand-minus-corpus means the domain receives more evaluation demand than its corpus document share.",
	)

	@staticmethod
	def _domain_difficulty_table(df: pd.DataFrame) -> pd.DataFrame:
	needed = {"domain", "difficulty"}
	if df.empty or not needed.issubset(df.columns):
	return pd.DataFrame()
	aggs: dict[str, tuple[str, str]] = {"n": ("domain", "size")}
	if "is_correct" in df.columns:
	aggs["correct_rate"] = ("is_correct", "mean")
	if "hallucination_flag" in df.columns:
	aggs["hallucination_rate"] = ("hallucination_flag", "mean")
	if "recall_at_10" in df.columns:
	aggs["recall_at_10"] = ("recall_at_10", "mean")
	out = df.groupby(["domain", "difficulty"], dropna=False).agg(**aggs).reset_index()
	for col in ["correct_rate", "hallucination_rate", "recall_at_10"]:
	if col not in out.columns:
	out[col] = np.nan
	return out