rag-qa-command-cente / src /views /quality_map.py
Tarek Masryo
chore: update project files
6bef416
from __future__ import annotations
import numpy as np
import pandas as pd
import streamlit as st
from src.charts import bubble_quality, heatmap
from src.ui import callout, section_title
class QualityMapViewMixin:
"""Quality maps across domain, difficulty, and risk slices."""
def _page_quality_map(self) -> None:
section_title("Quality Map", "Where quality is strong, unstable, or worth deeper review.")
domain_diff = self._domain_difficulty_table(self.ctx.filtered_eval)
left, right = st.columns(2, gap="large")
with left:
self._plot(heatmap(domain_diff, "difficulty", "domain", "correct_rate", "Correctness 路 domain 脳 difficulty"), "quality_correctness")
with right:
self._plot(heatmap(domain_diff, "difficulty", "domain", "hallucination_rate", "Hallucination 路 domain 脳 difficulty"), "quality_hallucination")
self._plot(bubble_quality(self.ctx.risk_slices, "Risk slice map"), "quality_risk_bubble")
if not self.ctx.demand_coverage.empty:
callout(
"info",
"Coverage interpretation",
"Positive demand-minus-corpus means the domain receives more evaluation demand than its corpus document share.",
)
@staticmethod
def _domain_difficulty_table(df: pd.DataFrame) -> pd.DataFrame:
needed = {"domain", "difficulty"}
if df.empty or not needed.issubset(df.columns):
return pd.DataFrame()
aggs: dict[str, tuple[str, str]] = {"n": ("domain", "size")}
if "is_correct" in df.columns:
aggs["correct_rate"] = ("is_correct", "mean")
if "hallucination_flag" in df.columns:
aggs["hallucination_rate"] = ("hallucination_flag", "mean")
if "recall_at_10" in df.columns:
aggs["recall_at_10"] = ("recall_at_10", "mean")
out = df.groupby(["domain", "difficulty"], dropna=False).agg(**aggs).reset_index()
for col in ["correct_rate", "hallucination_rate", "recall_at_10"]:
if col not in out.columns:
out[col] = np.nan
return out