Spaces:

huggingface
/

transformers-ci-example

Running

App Files Files Community

transformers-ci-example / app.py

tarekziade HF Staff

fix link

8437436 9 days ago

Raw

History Blame Contribute Delete

7.98 kB

	"""Transformers CI — most common test failures.

	A tiny Gradio dashboard over the public `transformers-ci-telemetry` bucket
	(daily-partitioned Parquet produced by the CI telemetry publisher). It ranks
	the tests and exception types that fail most often, with a few headline stats.

	Data location: set ``TELEMETRY_DIR`` to the bucket mount. We otherwise probe a
	short list of common paths (the Space's bucket mount, the local checkout) and
	use the first one that actually contains a ``daily/`` tree.
	"""

	from __future__ import annotations

	import glob
	import os
	from typing import Any

	import gradio as gr
	import pandas as pd

	# Candidate locations for the bucket contents, in priority order. The Space
	# mounts the bucket at a configured path; locally it's the synced checkout.
	_CANDIDATE_DIRS = [
	os.environ.get("TELEMETRY_DIR", ""),
	"/data/transformers-ci-telemetry",
	"/data",
	"/bucket",
	os.path.join(os.path.dirname(__file__), "data"),
	"/Users/tarek/Dev/transformers-ci-telemetry",
	]


	def _telemetry_dir() -> str \| None:
	"""First candidate dir that contains a non-empty ``daily/`` tree."""
	for candidate in _CANDIDATE_DIRS:
	if candidate and glob.glob(os.path.join(candidate, "daily", "*", "test_rows.parquet")):
	return candidate
	return None


	def load_test_rows() -> pd.DataFrame:
	"""Concatenate every ``daily/*/test_rows.parquet`` into one frame."""
	base = _telemetry_dir()
	if base is None:
	return pd.DataFrame()
	files = sorted(glob.glob(os.path.join(base, "daily", "*", "test_rows.parquet")))
	frames = []
	for path in files:
	try:
	frames.append(pd.read_parquet(path))
	except Exception: # noqa: BLE001 - skip a corrupt/partial partition
	continue
	if not frames:
	return pd.DataFrame()
	return pd.concat(frames, ignore_index=True)


	def _summary_md(df: pd.DataFrame) -> str:
	if df.empty:
	return (
	"### No data found\n\n"
	"No `daily/*/test_rows.parquet` under any known bucket path. "
	"Set `TELEMETRY_DIR` to the mounted bucket."
	)
	total = len(df)
	failures = int((df["status_code"] == "ERROR").sum())
	rate = (failures / total * 100) if total else 0.0
	runs = df["run_id"].nunique()
	days = df["date"].nunique()
	return (
	f"{total} test executions across {runs} run(s) / {days} day(s) · "
	f"{failures} failures · {rate:.1f}% failure rate"
	)


	def _top_failing_tests(df: pd.DataFrame, limit: int = 20) -> pd.DataFrame:
	if df.empty:
	return pd.DataFrame(columns=["test_nodeid", "failures"])
	errors = df[df["status_code"] == "ERROR"]
	if errors.empty:
	return pd.DataFrame(columns=["test_nodeid", "failures"])
	out = (
	errors.groupby("test_nodeid")
	.size()
	.reset_index(name="failures")
	.sort_values("failures", ascending=False)
	.head(limit)
	.reset_index(drop=True)
	)
	return out


	def _failures_by(df: pd.DataFrame, column: str, label: str) -> pd.DataFrame:
	cols = [label, "failures"]
	if df.empty:
	return pd.DataFrame(columns=cols)
	errors = df[df["status_code"] == "ERROR"].copy()
	if errors.empty:
	return pd.DataFrame(columns=cols)
	errors[column] = errors[column].fillna("").replace("", "(none)")
	out = (
	errors.groupby(column)
	.size()
	.reset_index(name="failures")
	.sort_values("failures", ascending=False)
	.reset_index(drop=True)
	)
	return out.rename(columns={column: label})


	def _error_rows(df: pd.DataFrame, limit: int = 100) -> tuple[pd.DataFrame, list[dict[str, Any]]]:
	columns = ["date", "test_nodeid", "exception_type", "exception_message"]
	if df.empty or "status_code" not in df:
	return pd.DataFrame(columns=columns), []

	errors = df[df["status_code"] == "ERROR"].copy()
	if errors.empty:
	return pd.DataFrame(columns=columns), []

	if "date" in errors:
	errors = errors.sort_values("date", ascending=False)
	errors = errors.head(limit).reset_index(drop=True)

	detail_columns = columns + ["exception_stacktrace", "run_id", "test_job", "model", "gpu"]
	for column in detail_columns:
	if column not in errors:
	errors[column] = ""

	details = errors[detail_columns].fillna("").to_dict("records")
	return errors[columns].fillna(""), details


	def _error_stacktrace(details: list[dict[str, Any]] \| None, evt: gr.SelectData) -> str:
	if not details:
	return "Select an error row to see its full stacktrace."

	row_index = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
	try:
	row = details[int(row_index)]
	except (TypeError, ValueError, IndexError):
	return "Select an error row to see its full stacktrace."

	stacktrace = str(row.get("exception_stacktrace") or "").strip()
	message = str(row.get("exception_message") or "").strip()
	if not stacktrace:
	stacktrace = message or "(no stacktrace recorded)"

	header = "\n".join(
	value
	for value in [
	str(row.get("test_nodeid") or "").strip(),
	f"{row.get('exception_type')}: {message}".strip(": "),
	f"date={row.get('date')} run_id={row.get('run_id')} job={row.get('test_job')}",
	f"model={row.get('model') or '(none)'} gpu={row.get('gpu') or '(none)'}",
	]
	if value
	)
	return f"{header}\n\n{stacktrace}" if header else stacktrace


	def refresh():
	df = load_test_rows()
	top_tests = _top_failing_tests(df)
	by_type = _failures_by(df, "exception_type", "exception_type")
	by_model = _failures_by(df, "model", "model")
	error_rows, error_details = _error_rows(df)
	# BarPlot wants a tidy frame; reuse the top-tests table (trim the nodeid for
	# readability on the axis).
	plot_df = top_tests.head(10).copy()
	if not plot_df.empty:
	plot_df["test"] = plot_df["test_nodeid"].str.split("::").str[-1]
	else:
	plot_df = pd.DataFrame({"test": [], "failures": []})
	return (
	_summary_md(df),
	plot_df,
	top_tests,
	by_type,
	by_model,
	error_rows,
	error_details,
	"Select an error row to see its full stacktrace.",
	)


	with gr.Blocks(title="Transformers CI — common failures") as demo:
	gr.Markdown("# ⚡ Transformers CI — most common test failures")
	gr.Markdown(
	"Built on the public "
	"[`transformers-ci-telemetry`](https://huggingface.co/buckets/huggingface/transformers-ci-telemetry) "
	"bucket — CI test telemetry, refreshed hourly."
	)
	summary = gr.Markdown()
	refresh_btn = gr.Button("↻ Refresh", variant="secondary")

	gr.Markdown("## Top failing tests")
	fail_plot = gr.BarPlot(
	x="test", y="failures", title="Failures by test (top 10)", height=320
	)
	top_tests_tbl = gr.Dataframe(label="Top failing tests", interactive=False)

	with gr.Row():
	by_type_tbl = gr.Dataframe(label="Failures by exception type", interactive=False)
	by_model_tbl = gr.Dataframe(label="Failures by model", interactive=False)

	gr.Markdown("## Recent errors")
	error_details_state = gr.State([])
	error_rows_tbl = gr.Dataframe(label="Errors", interactive=False, wrap=True)
	stacktrace_box = gr.Code(
	label="Full stacktrace",
	language="python",
	interactive=False,
	lines=24,
	)

	outputs = [
	summary,
	fail_plot,
	top_tests_tbl,
	by_type_tbl,
	by_model_tbl,
	error_rows_tbl,
	error_details_state,
	stacktrace_box,
	]
	refresh_btn.click(refresh, outputs=outputs)
	demo.load(refresh, outputs=outputs)
	error_rows_tbl.select(
	_error_stacktrace,
	inputs=error_details_state,
	outputs=stacktrace_box,
	)


	if __name__ == "__main__":
	demo.launch()