Spaces:

huggingface
/

transformers-ci-example

Running

File size: 7,981 Bytes

"""Transformers CI — most common test failures.

A tiny Gradio dashboard over the public `transformers-ci-telemetry` bucket
(daily-partitioned Parquet produced by the CI telemetry publisher). It ranks
the tests and exception types that fail most often, with a few headline stats.

Data location: set ``TELEMETRY_DIR`` to the bucket mount. We otherwise probe a
short list of common paths (the Space's bucket mount, the local checkout) and
use the first one that actually contains a ``daily/`` tree.
"""

from __future__ import annotations

import glob
import os
from typing import Any

import gradio as gr
import pandas as pd

# Candidate locations for the bucket contents, in priority order. The Space
# mounts the bucket at a configured path; locally it's the synced checkout.
_CANDIDATE_DIRS = [
    os.environ.get("TELEMETRY_DIR", ""),
    "/data/transformers-ci-telemetry",
    "/data",
    "/bucket",
    os.path.join(os.path.dirname(__file__), "data"),
    "/Users/tarek/Dev/transformers-ci-telemetry",
]


def _telemetry_dir() -> str | None:
    """First candidate dir that contains a non-empty ``daily/`` tree."""
    for candidate in _CANDIDATE_DIRS:
        if candidate and glob.glob(os.path.join(candidate, "daily", "*", "test_rows.parquet")):
            return candidate
    return None


def load_test_rows() -> pd.DataFrame:
    """Concatenate every ``daily/*/test_rows.parquet`` into one frame."""
    base = _telemetry_dir()
    if base is None:
        return pd.DataFrame()
    files = sorted(glob.glob(os.path.join(base, "daily", "*", "test_rows.parquet")))
    frames = []
    for path in files:
        try:
            frames.append(pd.read_parquet(path))
        except Exception:  # noqa: BLE001 - skip a corrupt/partial partition
            continue
    if not frames:
        return pd.DataFrame()
    return pd.concat(frames, ignore_index=True)


def _summary_md(df: pd.DataFrame) -> str:
    if df.empty:
        return (
            "### No data found\n\n"
            "No `daily/*/test_rows.parquet` under any known bucket path. "
            "Set `TELEMETRY_DIR` to the mounted bucket."
        )
    total = len(df)
    failures = int((df["status_code"] == "ERROR").sum())
    rate = (failures / total * 100) if total else 0.0
    runs = df["run_id"].nunique()
    days = df["date"].nunique()
    return (
        f"**{total}** test executions across **{runs}** run(s) / **{days}** day(s)  ·  "
        f"**{failures}** failures  ·  **{rate:.1f}%** failure rate"
    )


def _top_failing_tests(df: pd.DataFrame, limit: int = 20) -> pd.DataFrame:
    if df.empty:
        return pd.DataFrame(columns=["test_nodeid", "failures"])
    errors = df[df["status_code"] == "ERROR"]
    if errors.empty:
        return pd.DataFrame(columns=["test_nodeid", "failures"])
    out = (
        errors.groupby("test_nodeid")
        .size()
        .reset_index(name="failures")
        .sort_values("failures", ascending=False)
        .head(limit)
        .reset_index(drop=True)
    )
    return out


def _failures_by(df: pd.DataFrame, column: str, label: str) -> pd.DataFrame:
    cols = [label, "failures"]
    if df.empty:
        return pd.DataFrame(columns=cols)
    errors = df[df["status_code"] == "ERROR"].copy()
    if errors.empty:
        return pd.DataFrame(columns=cols)
    errors[column] = errors[column].fillna("").replace("", "(none)")
    out = (
        errors.groupby(column)
        .size()
        .reset_index(name="failures")
        .sort_values("failures", ascending=False)
        .reset_index(drop=True)
    )
    return out.rename(columns={column: label})


def _error_rows(df: pd.DataFrame, limit: int = 100) -> tuple[pd.DataFrame, list[dict[str, Any]]]:
    columns = ["date", "test_nodeid", "exception_type", "exception_message"]
    if df.empty or "status_code" not in df:
        return pd.DataFrame(columns=columns), []

    errors = df[df["status_code"] == "ERROR"].copy()
    if errors.empty:
        return pd.DataFrame(columns=columns), []

    if "date" in errors:
        errors = errors.sort_values("date", ascending=False)
    errors = errors.head(limit).reset_index(drop=True)

    detail_columns = columns + ["exception_stacktrace", "run_id", "test_job", "model", "gpu"]
    for column in detail_columns:
        if column not in errors:
            errors[column] = ""

    details = errors[detail_columns].fillna("").to_dict("records")
    return errors[columns].fillna(""), details


def _error_stacktrace(details: list[dict[str, Any]] | None, evt: gr.SelectData) -> str:
    if not details:
        return "Select an error row to see its full stacktrace."

    row_index = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
    try:
        row = details[int(row_index)]
    except (TypeError, ValueError, IndexError):
        return "Select an error row to see its full stacktrace."

    stacktrace = str(row.get("exception_stacktrace") or "").strip()
    message = str(row.get("exception_message") or "").strip()
    if not stacktrace:
        stacktrace = message or "(no stacktrace recorded)"

    header = "\n".join(
        value
        for value in [
            str(row.get("test_nodeid") or "").strip(),
            f"{row.get('exception_type')}: {message}".strip(": "),
            f"date={row.get('date')} run_id={row.get('run_id')} job={row.get('test_job')}",
            f"model={row.get('model') or '(none)'} gpu={row.get('gpu') or '(none)'}",
        ]
        if value
    )
    return f"{header}\n\n{stacktrace}" if header else stacktrace


def refresh():
    df = load_test_rows()
    top_tests = _top_failing_tests(df)
    by_type = _failures_by(df, "exception_type", "exception_type")
    by_model = _failures_by(df, "model", "model")
    error_rows, error_details = _error_rows(df)
    # BarPlot wants a tidy frame; reuse the top-tests table (trim the nodeid for
    # readability on the axis).
    plot_df = top_tests.head(10).copy()
    if not plot_df.empty:
        plot_df["test"] = plot_df["test_nodeid"].str.split("::").str[-1]
    else:
        plot_df = pd.DataFrame({"test": [], "failures": []})
    return (
        _summary_md(df),
        plot_df,
        top_tests,
        by_type,
        by_model,
        error_rows,
        error_details,
        "Select an error row to see its full stacktrace.",
    )


with gr.Blocks(title="Transformers CI — common failures") as demo:
    gr.Markdown("# ⚡ Transformers CI — most common test failures")
    gr.Markdown(
        "Built on the public "
        "[`transformers-ci-telemetry`](https://huggingface.co/buckets/huggingface/transformers-ci-telemetry) "
        "bucket — CI test telemetry, refreshed hourly."
    )
    summary = gr.Markdown()
    refresh_btn = gr.Button("↻ Refresh", variant="secondary")

    gr.Markdown("## Top failing tests")
    fail_plot = gr.BarPlot(
        x="test", y="failures", title="Failures by test (top 10)", height=320
    )
    top_tests_tbl = gr.Dataframe(label="Top failing tests", interactive=False)

    with gr.Row():
        by_type_tbl = gr.Dataframe(label="Failures by exception type", interactive=False)
        by_model_tbl = gr.Dataframe(label="Failures by model", interactive=False)

    gr.Markdown("## Recent errors")
    error_details_state = gr.State([])
    error_rows_tbl = gr.Dataframe(label="Errors", interactive=False, wrap=True)
    stacktrace_box = gr.Code(
        label="Full stacktrace",
        language="python",
        interactive=False,
        lines=24,
    )

    outputs = [
        summary,
        fail_plot,
        top_tests_tbl,
        by_type_tbl,
        by_model_tbl,
        error_rows_tbl,
        error_details_state,
        stacktrace_box,
    ]
    refresh_btn.click(refresh, outputs=outputs)
    demo.load(refresh, outputs=outputs)
    error_rows_tbl.select(
        _error_stacktrace,
        inputs=error_details_state,
        outputs=stacktrace_box,
    )


if __name__ == "__main__":
    demo.launch()