Tarek Masryo
chore: update project files
6bef416
from __future__ import annotations
import textwrap
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
COLORWAY = ["#22d3ee", "#8b5cf6", "#f59e0b", "#22c55e", "#ef4444", "#38bdf8", "#a78bfa"]
FAILURE_MODE_LABELS = {
"healthy": "Healthy",
"generation_failure": "Generation failure",
"retrieval_failure": "Retrieval failure",
"hallucination_failure": "Hallucination failure",
"hallucination_risk_correct_answer": "Hallucination risk",
"recovered_by_generation": "Recovered by generation",
"Missing / Not provided": "Missing",
}
def readable_failure_mode(value: object) -> str:
"""Convert internal failure-mode identifiers into UI-safe labels."""
text = str(value)
return FAILURE_MODE_LABELS.get(text, text.replace("_", " ").title())
def _wrap_title(title: str, width: int = 54) -> str:
if not title:
return ""
return "<br>".join(textwrap.wrap(str(title), width=width))
def apply_theme(fig: go.Figure, height: int = 420, *, title: str | None = None) -> go.Figure:
"""Apply a compact, non-overlapping Plotly theme.
Chart titles, legends, and Plotly modebar controls can overlap inside narrow
Streamlit columns. The theme keeps the title at the top, moves legends below
the plotting area, and leaves enough margin for both.
"""
current_title = title if title is not None else fig.layout.title.text
fig.update_layout(
template="plotly_dark",
height=height,
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(5,8,22,0.35)",
font=dict(color="#dbeafe", family="Inter, Segoe UI, sans-serif", size=12),
colorway=COLORWAY,
margin=dict(l=28, r=28, t=84, b=92),
title=dict(
text=_wrap_title(current_title or ""),
x=0.0,
xanchor="left",
y=0.98,
yanchor="top",
font=dict(size=15, color="#f8fafc"),
),
legend=dict(
orientation="h",
yanchor="top",
y=-0.18,
xanchor="left",
x=0.0,
font=dict(size=11),
bgcolor="rgba(0,0,0,0)",
itemclick="toggleothers",
itemdoubleclick="toggle",
),
hoverlabel=dict(bgcolor="#0f172a", font_size=12),
uniformtext_minsize=10,
uniformtext_mode="hide",
)
fig.update_xaxes(gridcolor="rgba(148,163,184,0.14)", zerolinecolor="rgba(148,163,184,0.18)")
fig.update_yaxes(gridcolor="rgba(148,163,184,0.14)", zerolinecolor="rgba(148,163,184,0.18)")
return fig
def empty_chart(title: str = "No data available") -> go.Figure:
fig = go.Figure()
fig.add_annotation(text=title, x=0.5, y=0.5, showarrow=False, font=dict(size=16, color="#94a3b8"))
fig.update_xaxes(visible=False)
fig.update_yaxes(visible=False)
return apply_theme(fig, 320, title="")
def heatmap(df: pd.DataFrame, x: str, y: str, z: str, title: str, height: int = 430) -> go.Figure:
if df.empty or not {x, y, z}.issubset(df.columns):
return empty_chart()
pivot = df.pivot_table(index=y, columns=x, values=z, aggfunc="mean")
scale = ["#ef4444", "#f59e0b", "#22c55e"] if "correct" in z or "recall" in z else ["#22c55e", "#f59e0b", "#ef4444"]
fig = px.imshow(pivot, color_continuous_scale=scale, aspect="auto", title=title, text_auto=".2f")
return apply_theme(fig, height)
def ranked_bar(df: pd.DataFrame, x: str, y: str, title: str, color: str | None = None, height: int = 430) -> go.Figure:
if df.empty or not {x, y}.issubset(df.columns):
return empty_chart()
plot_df = df.copy().head(16)
fig = px.bar(plot_df, x=x, y=y, color=color if color in plot_df.columns else None, orientation="h", title=title)
fig.update_yaxes(autorange="reversed")
return apply_theme(fig, height)
def bubble_quality(df: pd.DataFrame, title: str = "Quality risk map") -> go.Figure:
needed = {"correct_rate", "hallucination_rate", "n"}
if df.empty or not needed.issubset(df.columns):
return empty_chart()
color_col = "domain" if "domain" in df.columns else None
hover_cols = [c for c in ["domain", "scenario_type", "difficulty", "n", "recall_at_10", "risk_score"] if c in df.columns]
fig = px.scatter(
df,
x="hallucination_rate",
y="correct_rate",
size="n",
color=color_col,
hover_data=hover_cols,
title=title,
size_max=34,
)
fig.add_vline(x=df["hallucination_rate"].median(), line_dash="dot", line_color="rgba(245,158,11,.8)")
fig.add_hline(y=df["correct_rate"].median(), line_dash="dot", line_color="rgba(34,211,238,.8)")
fig.update_xaxes(tickformat=".0%")
fig.update_yaxes(tickformat=".0%")
return apply_theme(fig, 500)
def cost_quality_scatter(df: pd.DataFrame, objective: str) -> go.Figure:
needed = {"correct_rate", "avg_cost_usd", "p95_latency_ms", "score"}
if df.empty or not needed.issubset(df.columns):
return empty_chart()
fig = px.scatter(
df,
x="avg_cost_usd",
y="correct_rate",
size="p95_latency_ms",
color="score",
hover_name="config" if "config" in df.columns else None,
hover_data=[c for c in ["n", "hallucination_rate", "recall_at_10", "mrr_at_10"] if c in df.columns],
color_continuous_scale="Turbo",
title=f"Config frontier · {objective}",
size_max=40,
)
fig.update_yaxes(tickformat=".0%")
return apply_theme(fig, 500)
def retrieval_mode_donut(df: pd.DataFrame) -> go.Figure:
if df.empty or not {"failure_mode", "n"}.issubset(df.columns):
return empty_chart()
plot_df = df.copy()
plot_df["failure_mode_label"] = plot_df["failure_mode"].map(readable_failure_mode)
fig = px.pie(plot_df, names="failure_mode_label", values="n", hole=0.60, title="Outcome mix")
fig.update_traces(
textposition="inside",
textinfo="percent",
hovertemplate="%{label}<br>Rows: %{value}<br>Share: %{percent}<extra></extra>",
marker=dict(line=dict(color="rgba(15,23,42,0.9)", width=1)),
)
return apply_theme(fig, 440)
def policy_curve_chart(df: pd.DataFrame) -> go.Figure:
if df.empty or "threshold" not in df.columns:
return empty_chart()
fig = go.Figure()
if "auto_approve_rate" in df.columns:
fig.add_trace(go.Scatter(x=df["threshold"], y=df["auto_approve_rate"], name="Auto approve", mode="lines+markers"))
if "auto_correct_rate" in df.columns:
fig.add_trace(go.Scatter(x=df["threshold"], y=df["auto_correct_rate"], name="Auto correct", mode="lines+markers"))
if "risk_captured_in_review" in df.columns:
fig.add_trace(go.Scatter(x=df["threshold"], y=df["risk_captured_in_review"], name="Risk captured", mode="lines+markers"))
fig.update_layout(title="Policy curve")
fig.update_yaxes(tickformat=".0%")
return apply_theme(fig, 480)
def demand_chart(df: pd.DataFrame) -> go.Figure:
if df.empty or not {"domain", "eval_demand_share", "corpus_document_share"}.issubset(df.columns):
return empty_chart()
plot_df = df.head(12).melt(id_vars="domain", value_vars=["eval_demand_share", "corpus_document_share"], var_name="signal", value_name="share")
plot_df["signal"] = plot_df["signal"].replace({"eval_demand_share": "Eval demand", "corpus_document_share": "Corpus docs"})
fig = px.bar(plot_df, x="share", y="domain", color="signal", barmode="group", orientation="h", title="Coverage vs demand")
fig.update_xaxes(tickformat=".0%")
fig.update_yaxes(autorange="reversed")
return apply_theme(fig, 480)
def treemap_risk(df: pd.DataFrame) -> go.Figure:
if df.empty or "risk_score" not in df.columns:
return empty_chart()
path = [c for c in ["domain", "scenario_type", "difficulty"] if c in df.columns]
if not path:
return empty_chart()
fig = px.treemap(df.head(80), path=path, values="n", color="risk_score", color_continuous_scale="Reds", title="Risk surface")
return apply_theme(fig, 540)
def rank_distribution(retrieval_df: pd.DataFrame) -> go.Figure:
if retrieval_df.empty or not {"rank", "is_relevant"}.issubset(retrieval_df.columns):
return empty_chart()
src = retrieval_df.copy()
src["rank"] = pd.to_numeric(src["rank"], errors="coerce")
out = src.groupby("rank").agg(relevant_rate=("is_relevant", "mean"), n=("is_relevant", "size")).reset_index()
fig = px.bar(out, x="rank", y="relevant_rate", hover_data=["n"], title="Relevant rate by rank")
fig.update_yaxes(tickformat=".0%")
return apply_theme(fig, 440)