tarekziade's picture
tarekziade HF Staff
fix link
8437436
Raw
History Blame Contribute Delete
7.98 kB
"""Transformers CI — most common test failures.
A tiny Gradio dashboard over the public `transformers-ci-telemetry` bucket
(daily-partitioned Parquet produced by the CI telemetry publisher). It ranks
the tests and exception types that fail most often, with a few headline stats.
Data location: set ``TELEMETRY_DIR`` to the bucket mount. We otherwise probe a
short list of common paths (the Space's bucket mount, the local checkout) and
use the first one that actually contains a ``daily/`` tree.
"""
from __future__ import annotations
import glob
import os
from typing import Any
import gradio as gr
import pandas as pd
# Candidate locations for the bucket contents, in priority order. The Space
# mounts the bucket at a configured path; locally it's the synced checkout.
_CANDIDATE_DIRS = [
os.environ.get("TELEMETRY_DIR", ""),
"/data/transformers-ci-telemetry",
"/data",
"/bucket",
os.path.join(os.path.dirname(__file__), "data"),
"/Users/tarek/Dev/transformers-ci-telemetry",
]
def _telemetry_dir() -> str | None:
"""First candidate dir that contains a non-empty ``daily/`` tree."""
for candidate in _CANDIDATE_DIRS:
if candidate and glob.glob(os.path.join(candidate, "daily", "*", "test_rows.parquet")):
return candidate
return None
def load_test_rows() -> pd.DataFrame:
"""Concatenate every ``daily/*/test_rows.parquet`` into one frame."""
base = _telemetry_dir()
if base is None:
return pd.DataFrame()
files = sorted(glob.glob(os.path.join(base, "daily", "*", "test_rows.parquet")))
frames = []
for path in files:
try:
frames.append(pd.read_parquet(path))
except Exception: # noqa: BLE001 - skip a corrupt/partial partition
continue
if not frames:
return pd.DataFrame()
return pd.concat(frames, ignore_index=True)
def _summary_md(df: pd.DataFrame) -> str:
if df.empty:
return (
"### No data found\n\n"
"No `daily/*/test_rows.parquet` under any known bucket path. "
"Set `TELEMETRY_DIR` to the mounted bucket."
)
total = len(df)
failures = int((df["status_code"] == "ERROR").sum())
rate = (failures / total * 100) if total else 0.0
runs = df["run_id"].nunique()
days = df["date"].nunique()
return (
f"**{total}** test executions across **{runs}** run(s) / **{days}** day(s) · "
f"**{failures}** failures · **{rate:.1f}%** failure rate"
)
def _top_failing_tests(df: pd.DataFrame, limit: int = 20) -> pd.DataFrame:
if df.empty:
return pd.DataFrame(columns=["test_nodeid", "failures"])
errors = df[df["status_code"] == "ERROR"]
if errors.empty:
return pd.DataFrame(columns=["test_nodeid", "failures"])
out = (
errors.groupby("test_nodeid")
.size()
.reset_index(name="failures")
.sort_values("failures", ascending=False)
.head(limit)
.reset_index(drop=True)
)
return out
def _failures_by(df: pd.DataFrame, column: str, label: str) -> pd.DataFrame:
cols = [label, "failures"]
if df.empty:
return pd.DataFrame(columns=cols)
errors = df[df["status_code"] == "ERROR"].copy()
if errors.empty:
return pd.DataFrame(columns=cols)
errors[column] = errors[column].fillna("").replace("", "(none)")
out = (
errors.groupby(column)
.size()
.reset_index(name="failures")
.sort_values("failures", ascending=False)
.reset_index(drop=True)
)
return out.rename(columns={column: label})
def _error_rows(df: pd.DataFrame, limit: int = 100) -> tuple[pd.DataFrame, list[dict[str, Any]]]:
columns = ["date", "test_nodeid", "exception_type", "exception_message"]
if df.empty or "status_code" not in df:
return pd.DataFrame(columns=columns), []
errors = df[df["status_code"] == "ERROR"].copy()
if errors.empty:
return pd.DataFrame(columns=columns), []
if "date" in errors:
errors = errors.sort_values("date", ascending=False)
errors = errors.head(limit).reset_index(drop=True)
detail_columns = columns + ["exception_stacktrace", "run_id", "test_job", "model", "gpu"]
for column in detail_columns:
if column not in errors:
errors[column] = ""
details = errors[detail_columns].fillna("").to_dict("records")
return errors[columns].fillna(""), details
def _error_stacktrace(details: list[dict[str, Any]] | None, evt: gr.SelectData) -> str:
if not details:
return "Select an error row to see its full stacktrace."
row_index = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
try:
row = details[int(row_index)]
except (TypeError, ValueError, IndexError):
return "Select an error row to see its full stacktrace."
stacktrace = str(row.get("exception_stacktrace") or "").strip()
message = str(row.get("exception_message") or "").strip()
if not stacktrace:
stacktrace = message or "(no stacktrace recorded)"
header = "\n".join(
value
for value in [
str(row.get("test_nodeid") or "").strip(),
f"{row.get('exception_type')}: {message}".strip(": "),
f"date={row.get('date')} run_id={row.get('run_id')} job={row.get('test_job')}",
f"model={row.get('model') or '(none)'} gpu={row.get('gpu') or '(none)'}",
]
if value
)
return f"{header}\n\n{stacktrace}" if header else stacktrace
def refresh():
df = load_test_rows()
top_tests = _top_failing_tests(df)
by_type = _failures_by(df, "exception_type", "exception_type")
by_model = _failures_by(df, "model", "model")
error_rows, error_details = _error_rows(df)
# BarPlot wants a tidy frame; reuse the top-tests table (trim the nodeid for
# readability on the axis).
plot_df = top_tests.head(10).copy()
if not plot_df.empty:
plot_df["test"] = plot_df["test_nodeid"].str.split("::").str[-1]
else:
plot_df = pd.DataFrame({"test": [], "failures": []})
return (
_summary_md(df),
plot_df,
top_tests,
by_type,
by_model,
error_rows,
error_details,
"Select an error row to see its full stacktrace.",
)
with gr.Blocks(title="Transformers CI — common failures") as demo:
gr.Markdown("# ⚡ Transformers CI — most common test failures")
gr.Markdown(
"Built on the public "
"[`transformers-ci-telemetry`](https://huggingface.co/buckets/huggingface/transformers-ci-telemetry) "
"bucket — CI test telemetry, refreshed hourly."
)
summary = gr.Markdown()
refresh_btn = gr.Button("↻ Refresh", variant="secondary")
gr.Markdown("## Top failing tests")
fail_plot = gr.BarPlot(
x="test", y="failures", title="Failures by test (top 10)", height=320
)
top_tests_tbl = gr.Dataframe(label="Top failing tests", interactive=False)
with gr.Row():
by_type_tbl = gr.Dataframe(label="Failures by exception type", interactive=False)
by_model_tbl = gr.Dataframe(label="Failures by model", interactive=False)
gr.Markdown("## Recent errors")
error_details_state = gr.State([])
error_rows_tbl = gr.Dataframe(label="Errors", interactive=False, wrap=True)
stacktrace_box = gr.Code(
label="Full stacktrace",
language="python",
interactive=False,
lines=24,
)
outputs = [
summary,
fail_plot,
top_tests_tbl,
by_type_tbl,
by_model_tbl,
error_rows_tbl,
error_details_state,
stacktrace_box,
]
refresh_btn.click(refresh, outputs=outputs)
demo.load(refresh, outputs=outputs)
error_rows_tbl.select(
_error_stacktrace,
inputs=error_details_state,
outputs=stacktrace_box,
)
if __name__ == "__main__":
demo.launch()