| from __future__ import annotations |
|
|
| import textwrap |
|
|
| import pandas as pd |
| import plotly.express as px |
| import plotly.graph_objects as go |
|
|
| COLORWAY = ["#22d3ee", "#8b5cf6", "#f59e0b", "#22c55e", "#ef4444", "#38bdf8", "#a78bfa"] |
| FAILURE_MODE_LABELS = { |
| "healthy": "Healthy", |
| "generation_failure": "Generation failure", |
| "retrieval_failure": "Retrieval failure", |
| "hallucination_failure": "Hallucination failure", |
| "hallucination_risk_correct_answer": "Hallucination risk", |
| "recovered_by_generation": "Recovered by generation", |
| "Missing / Not provided": "Missing", |
| } |
|
|
|
|
| def readable_failure_mode(value: object) -> str: |
| """Convert internal failure-mode identifiers into UI-safe labels.""" |
| text = str(value) |
| return FAILURE_MODE_LABELS.get(text, text.replace("_", " ").title()) |
|
|
|
|
| def _wrap_title(title: str, width: int = 54) -> str: |
| if not title: |
| return "" |
| return "<br>".join(textwrap.wrap(str(title), width=width)) |
|
|
|
|
| def apply_theme(fig: go.Figure, height: int = 420, *, title: str | None = None) -> go.Figure: |
| """Apply a compact, non-overlapping Plotly theme. |
| |
| Chart titles, legends, and Plotly modebar controls can overlap inside narrow |
| Streamlit columns. The theme keeps the title at the top, moves legends below |
| the plotting area, and leaves enough margin for both. |
| """ |
| current_title = title if title is not None else fig.layout.title.text |
| fig.update_layout( |
| template="plotly_dark", |
| height=height, |
| paper_bgcolor="rgba(0,0,0,0)", |
| plot_bgcolor="rgba(5,8,22,0.35)", |
| font=dict(color="#dbeafe", family="Inter, Segoe UI, sans-serif", size=12), |
| colorway=COLORWAY, |
| margin=dict(l=28, r=28, t=84, b=92), |
| title=dict( |
| text=_wrap_title(current_title or ""), |
| x=0.0, |
| xanchor="left", |
| y=0.98, |
| yanchor="top", |
| font=dict(size=15, color="#f8fafc"), |
| ), |
| legend=dict( |
| orientation="h", |
| yanchor="top", |
| y=-0.18, |
| xanchor="left", |
| x=0.0, |
| font=dict(size=11), |
| bgcolor="rgba(0,0,0,0)", |
| itemclick="toggleothers", |
| itemdoubleclick="toggle", |
| ), |
| hoverlabel=dict(bgcolor="#0f172a", font_size=12), |
| uniformtext_minsize=10, |
| uniformtext_mode="hide", |
| ) |
| fig.update_xaxes(gridcolor="rgba(148,163,184,0.14)", zerolinecolor="rgba(148,163,184,0.18)") |
| fig.update_yaxes(gridcolor="rgba(148,163,184,0.14)", zerolinecolor="rgba(148,163,184,0.18)") |
| return fig |
|
|
|
|
| def empty_chart(title: str = "No data available") -> go.Figure: |
| fig = go.Figure() |
| fig.add_annotation(text=title, x=0.5, y=0.5, showarrow=False, font=dict(size=16, color="#94a3b8")) |
| fig.update_xaxes(visible=False) |
| fig.update_yaxes(visible=False) |
| return apply_theme(fig, 320, title="") |
|
|
|
|
| def heatmap(df: pd.DataFrame, x: str, y: str, z: str, title: str, height: int = 430) -> go.Figure: |
| if df.empty or not {x, y, z}.issubset(df.columns): |
| return empty_chart() |
| pivot = df.pivot_table(index=y, columns=x, values=z, aggfunc="mean") |
| scale = ["#ef4444", "#f59e0b", "#22c55e"] if "correct" in z or "recall" in z else ["#22c55e", "#f59e0b", "#ef4444"] |
| fig = px.imshow(pivot, color_continuous_scale=scale, aspect="auto", title=title, text_auto=".2f") |
| return apply_theme(fig, height) |
|
|
|
|
| def ranked_bar(df: pd.DataFrame, x: str, y: str, title: str, color: str | None = None, height: int = 430) -> go.Figure: |
| if df.empty or not {x, y}.issubset(df.columns): |
| return empty_chart() |
| plot_df = df.copy().head(16) |
| fig = px.bar(plot_df, x=x, y=y, color=color if color in plot_df.columns else None, orientation="h", title=title) |
| fig.update_yaxes(autorange="reversed") |
| return apply_theme(fig, height) |
|
|
|
|
| def bubble_quality(df: pd.DataFrame, title: str = "Quality risk map") -> go.Figure: |
| needed = {"correct_rate", "hallucination_rate", "n"} |
| if df.empty or not needed.issubset(df.columns): |
| return empty_chart() |
| color_col = "domain" if "domain" in df.columns else None |
| hover_cols = [c for c in ["domain", "scenario_type", "difficulty", "n", "recall_at_10", "risk_score"] if c in df.columns] |
| fig = px.scatter( |
| df, |
| x="hallucination_rate", |
| y="correct_rate", |
| size="n", |
| color=color_col, |
| hover_data=hover_cols, |
| title=title, |
| size_max=34, |
| ) |
| fig.add_vline(x=df["hallucination_rate"].median(), line_dash="dot", line_color="rgba(245,158,11,.8)") |
| fig.add_hline(y=df["correct_rate"].median(), line_dash="dot", line_color="rgba(34,211,238,.8)") |
| fig.update_xaxes(tickformat=".0%") |
| fig.update_yaxes(tickformat=".0%") |
| return apply_theme(fig, 500) |
|
|
|
|
| def cost_quality_scatter(df: pd.DataFrame, objective: str) -> go.Figure: |
| needed = {"correct_rate", "avg_cost_usd", "p95_latency_ms", "score"} |
| if df.empty or not needed.issubset(df.columns): |
| return empty_chart() |
| fig = px.scatter( |
| df, |
| x="avg_cost_usd", |
| y="correct_rate", |
| size="p95_latency_ms", |
| color="score", |
| hover_name="config" if "config" in df.columns else None, |
| hover_data=[c for c in ["n", "hallucination_rate", "recall_at_10", "mrr_at_10"] if c in df.columns], |
| color_continuous_scale="Turbo", |
| title=f"Config frontier · {objective}", |
| size_max=40, |
| ) |
| fig.update_yaxes(tickformat=".0%") |
| return apply_theme(fig, 500) |
|
|
|
|
| def retrieval_mode_donut(df: pd.DataFrame) -> go.Figure: |
| if df.empty or not {"failure_mode", "n"}.issubset(df.columns): |
| return empty_chart() |
| plot_df = df.copy() |
| plot_df["failure_mode_label"] = plot_df["failure_mode"].map(readable_failure_mode) |
| fig = px.pie(plot_df, names="failure_mode_label", values="n", hole=0.60, title="Outcome mix") |
| fig.update_traces( |
| textposition="inside", |
| textinfo="percent", |
| hovertemplate="%{label}<br>Rows: %{value}<br>Share: %{percent}<extra></extra>", |
| marker=dict(line=dict(color="rgba(15,23,42,0.9)", width=1)), |
| ) |
| return apply_theme(fig, 440) |
|
|
|
|
| def policy_curve_chart(df: pd.DataFrame) -> go.Figure: |
| if df.empty or "threshold" not in df.columns: |
| return empty_chart() |
| fig = go.Figure() |
| if "auto_approve_rate" in df.columns: |
| fig.add_trace(go.Scatter(x=df["threshold"], y=df["auto_approve_rate"], name="Auto approve", mode="lines+markers")) |
| if "auto_correct_rate" in df.columns: |
| fig.add_trace(go.Scatter(x=df["threshold"], y=df["auto_correct_rate"], name="Auto correct", mode="lines+markers")) |
| if "risk_captured_in_review" in df.columns: |
| fig.add_trace(go.Scatter(x=df["threshold"], y=df["risk_captured_in_review"], name="Risk captured", mode="lines+markers")) |
| fig.update_layout(title="Policy curve") |
| fig.update_yaxes(tickformat=".0%") |
| return apply_theme(fig, 480) |
|
|
|
|
| def demand_chart(df: pd.DataFrame) -> go.Figure: |
| if df.empty or not {"domain", "eval_demand_share", "corpus_document_share"}.issubset(df.columns): |
| return empty_chart() |
| plot_df = df.head(12).melt(id_vars="domain", value_vars=["eval_demand_share", "corpus_document_share"], var_name="signal", value_name="share") |
| plot_df["signal"] = plot_df["signal"].replace({"eval_demand_share": "Eval demand", "corpus_document_share": "Corpus docs"}) |
| fig = px.bar(plot_df, x="share", y="domain", color="signal", barmode="group", orientation="h", title="Coverage vs demand") |
| fig.update_xaxes(tickformat=".0%") |
| fig.update_yaxes(autorange="reversed") |
| return apply_theme(fig, 480) |
|
|
|
|
| def treemap_risk(df: pd.DataFrame) -> go.Figure: |
| if df.empty or "risk_score" not in df.columns: |
| return empty_chart() |
| path = [c for c in ["domain", "scenario_type", "difficulty"] if c in df.columns] |
| if not path: |
| return empty_chart() |
| fig = px.treemap(df.head(80), path=path, values="n", color="risk_score", color_continuous_scale="Reds", title="Risk surface") |
| return apply_theme(fig, 540) |
|
|
|
|
| def rank_distribution(retrieval_df: pd.DataFrame) -> go.Figure: |
| if retrieval_df.empty or not {"rank", "is_relevant"}.issubset(retrieval_df.columns): |
| return empty_chart() |
| src = retrieval_df.copy() |
| src["rank"] = pd.to_numeric(src["rank"], errors="coerce") |
| out = src.groupby("rank").agg(relevant_rate=("is_relevant", "mean"), n=("is_relevant", "size")).reset_index() |
| fig = px.bar(out, x="rank", y="relevant_rate", hover_data=["n"], title="Relevant rate by rank") |
| fig.update_yaxes(tickformat=".0%") |
| return apply_theme(fig, 440) |
|
|