Spaces:

Abdourakib
/

ai-data-science-agent

Running

App Files Files Community

ai-data-science-agent / agent /report.py

Abdourakib

Initial commit: AutoML Engineer Agent with README, LICENSE, and project files

5908c8b 25 days ago

raw

history blame contribute delete

20.2 kB

	"""
	Report generator: rich markdown/HTML from pipeline results.
	"""

	from __future__ import annotations

	import base64
	import html
	from datetime import datetime
	from pathlib import Path
	from typing import Any

	# Brand palette (match evaluate.py / UI)
	PALETTE = {
	"primary": "#534AB7",
	"secondary": "#1D9E75",
	"accent": "#D85A30",
	}


	def generate_report(
	objective: str,
	dataset_name: str,
	metrics: dict,
	best_model: str,
	output_dir: str \| Path = "outputs",
	) -> Path:
	"""
	Generate a markdown report with objective, dataset, metrics, and best model.
	"""
	output_dir = Path(output_dir)
	output_dir.mkdir(parents=True, exist_ok=True)

	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	report_path = output_dir / f"report_{timestamp}.md"

	content = f"""# AutoML Report

	Generated: {datetime.now().isoformat()}

	## Objective
	{objective}

	## Dataset
	{dataset_name}

	## Best Model
	{best_model}

	## Metrics
	```json
	{metrics}
	```

	---
	Report generated by automl-engineer-agent
	"""
	report_path.write_text(content, encoding="utf-8")
	return report_path


	def _dataset_size_label(dp: dict[str, Any]) -> str:
	n = int(dp.get("n_rows", 0) or 0)
	if n < 1000:
	return "Small (< 1000 rows)"
	if n <= 10000:
	return "Medium"
	return "Large (> 10000 rows)"


	def _embed_png(path: str) -> str:
	try:
	p = Path(path)
	if not p.exists():
	return ""
	raw = p.read_bytes()
	return base64.b64encode(raw).decode("ascii")
	except Exception:
	return ""


	def _shap_status(result: dict[str, Any]) -> str:
	pp = result.get("plot_paths") or {}
	ev = result.get("eval") or {}
	if ev.get("has_shap") or any(
	k in pp for k in ("shap_bar", "shap_summary")
	) or any(str(k).startswith("shap_dependence_") for k in pp):
	return "Complete"
	return "Not available"


	def _best_model_tuned_line(result: dict[str, Any]) -> str:
	name = result.get("best_model_name") or "—"
	tune = result.get("tune") or {}
	if tune.get("success"):
	return f"{name} (tuned)"
	return str(name)


	def _tuning_improvement_line(result: dict[str, Any]) -> str:
	tune = result.get("tune") or {}
	if not tune.get("success"):
	return "Not run"
	imp = tune.get("improvement")
	if imp is None:
	return "—"
	return f"{float(imp):+.4f}"


	def _count_pipeline_steps(result: dict[str, Any]) -> int:
	n = 0
	if result.get("eda"):
	n += 1
	if result.get("task"):
	n += 1
	if result.get("prep"):
	n += 1
	if result.get("plan"):
	n += 1
	if result.get("train"):
	n += 1
	if result.get("tune"):
	n += 1
	if result.get("eval") or result.get("plot_paths"):
	n += 1
	return n


	def _generate_next_steps(result: dict[str, Any]) -> list[str]:
	steps: list[str] = []
	eda = result.get("eda") or {}
	prep = result.get("prep") or {}
	ti = eda.get("target_info") or {}
	miss = eda.get("missing", {}).get("by_column", {}) or {}
	train_data = result.get("train", {}) or {}
	overfit_warnings = list(train_data.get("overfitting_warnings", []))

	if eda.get("overview", {}).get("rows", 0) < 500:
	steps.append("Collect more rows or use simpler models to reduce variance.")
	if overfit_warnings:
	steps.append("Reduce model complexity, add regularization, or gather more diverse data.")
	if any(info.get("pct", 0) > 30 for info in miss.values()):
	steps.append("Impute or drop high-missing columns before retraining.")
	if ti.get("imbalance_ratio", 0) and ti.get("imbalance_ratio", 0) > 5:
	steps.append("Try class_weight='balanced', SMOTE, or stratified sampling.")
	if prep.get("target_leakage_suspicion"):
	steps.append("Review columns flagged for possible target leakage.")

	tune = result.get("tune") or {}
	if tune.get("success"):
	imp = float(tune.get("improvement") or 0.0)
	if imp < 0.01:
	steps.append(
	"Hyperparameter tuning showed minimal improvement — consider feature engineering instead."
	)
	if imp > 0.05:
	steps.append(
	"Significant improvement from tuning — consider expanding the search space with more trials."
	)
	if tune.get("overfit"):
	steps.append(
	"Even after tuning the model overfit — try adding more training data or reducing model complexity."
	)

	steps.append("Run k-fold cross-validation on the training split to validate stability.")
	return steps[:12]


	def _build_plan_section_md(plan: dict[str, Any]) -> list[str]:
	dp = plan.get("dataset_profile") or {}
	lines: list[str] = [
	"## 3. Training Plan",
	"",
	"### Dataset profile",
	"",
	"\| Field \| Value \|",
	"\|-------\|-------\|",
	f"\| Rows \| {dp.get('n_rows', '—')} \|",
	f"\| Features \| {dp.get('n_features', '—')} \|",
	f"\| Size category \| {_dataset_size_label(dp)} \|",
	f"\| Imbalance ratio \| {dp.get('imbalance_ratio', '—')} \|",
	"",
	"### Models selected",
	"",
	"\| Model \| Selected \| Reason \|",
	"\|-------\|----------\|--------\|",
	]
	rec = plan.get("recommended_models") or []
	skip = plan.get("skip_models") or []
	reasons = plan.get("skip_reasons") or {}
	for m in rec:
	lines.append(f"\| {m} \| Yes \| Included in the training plan for this dataset. \|")
	for m in skip:
	r = reasons.get(m, "Excluded by training plan rules.")
	lines.append(f"\| {m} \| No \| {r} \|")
	lines += [
	"",
	"### Primary metric",
	"",
	f"- Metric: `{plan.get('primary_metric', '—')}`",
	f"- Reasoning: {plan.get('metric_reasoning', '—')}",
	"",
	"### Tuning budget",
	"",
	f"- Optuna trials: {plan.get('n_trials', '—')}",
	f"- Timeout (s): {plan.get('timeout', '—')}",
	"",
	]
	warns = plan.get("warnings") or []
	if warns:
	lines.append("### Warnings")
	lines.append("")
	for w in warns:
	lines.append(f"- ⚠️ {w}")
	lines.append("")
	return lines


	def _build_tune_section_md(tune: dict[str, Any]) -> list[str]:
	if not tune.get("success"):
	return [
	"## 5. Hyperparameter Tuning",
	"",
	f"Tuning did not complete successfully: {tune.get('error', 'unknown')}",
	"",
	]
	bp = tune.get("best_params") or {}
	lines = [
	"## 5. Hyperparameter Tuning",
	"",
	"\| Metric \| Value \|",
	"\|--------\|-------\|",
	f"\| Baseline score (test) \| {tune.get('baseline_score', '—')} \|",
	f"\| Best score after tuning \| {tune.get('best_score', '—')} \|",
	f"\| Improvement \| {tune.get('improvement', '—')} \|",
	f"\| Trials run \| {tune.get('n_trials_run', '—')} \|",
	f"\| Tuning time (s) \| {tune.get('tuning_time_s', '—')} \|",
	f"\| Overfit (train–test gap heuristic) \| {tune.get('overfit', '—')} \|",
	"",
	"### Best hyperparameters",
	"",
	"\| Parameter \| Value \|",
	"\|-----------\|-------\|",
	]
	for k, v in sorted(bp.items())[:48]:
	lines.append(f"\| `{k}` \| `{v!r}` \|")
	lines.append("")
	return lines


	def _build_markdown(result: dict[str, Any]) -> str:
	"""Full markdown report from a pipeline result dict."""
	lines: list[str] = [
	"# AutoML pipeline report",
	"",
	"\| Field \| Value \|",
	"\|-------\|-------\|",
	f"\| Best model (tuned) \| {_best_model_tuned_line(result)} \|",
	f"\| Tuning improvement \| {_tuning_improvement_line(result)} \|",
	f"\| SHAP analysis \| {_shap_status(result)} \|",
	f"\| Total pipeline steps \| {_count_pipeline_steps(result)} \|",
	f"\| Target \| {result.get('target_col', '—')} \|",
	f"\| Task \| {result.get('task_type', '—')} \|",
	"",
	"## 1. Overview",
	"",
	f"Best model: {result.get('best_model_name', '—')}",
	]
	tr_ov = result.get("train") or {}
	_bn = str(result.get("best_model_name", "—")).replace(" (tuned)", "").strip()
	_pm = tr_ov.get("metric_name") or (
	"roc_auc" if result.get("task_type") == "classification" else "r2"
	)
	for _r in tr_ov.get("results") or []:
	if _r.get("name") == _bn:
	_cvm = _r.get("cv_mean")
	_cvs = _r.get("cv_std")
	_nf = tr_ov.get("cv_folds_used") or 5
	if _cvm is not None and _cvs is not None:
	lines.append(
	f"\n{_bn} was selected using cross-validated performance: "
	f"CV mean {float(_cvm):.4f} ± {float(_cvs):.4f} across {_nf} folds. "
	f"That average reflects how the model scores when validated on different held-out "
	f"subsets of the training data (primary metric: `{_pm}`)."
	)
	break
	lines.append("")

	prep = result.get("prep") or {}
	if prep:
	lines += [
	"## 2. Preprocessing",
	"",
	f"- Final feature count: {prep.get('final_feature_count', '—')}",
	f"- Train / test size: {prep.get('train_size', '—')} / {prep.get('test_size', '—')}",
	"",
	]

	if result.get("plan"):
	lines += _build_plan_section_md(result["plan"])
	else:
	lines += ["## 3. Training Plan", "", "Not available for this run.", ""]

	cdf = result.get("comparison_df")
	lines += [
	"## 4. Model comparison",
	"",
	"The table includes CV Mean, CV Std, CV Train Mean, and CV Overfit when "
	"cross-validation ran; otherwise those cells are empty.",
	"",
	]
	if cdf is not None:
	try:
	lines.append(cdf.to_markdown(index=False))
	except Exception:
	lines.append(str(cdf))
	else:
	lines.append("No comparison table.")
	lines.append("")

	if result.get("tune") is not None:
	lines += _build_tune_section_md(result["tune"])
	else:
	lines += ["## 5. Hyperparameter Tuning", "", "Not run.", ""]

	lines += [
	"## 6. Evaluation metrics",
	"",
	"```",
	str(result.get("metrics", {})),
	"```",
	"",
	]

	pp = result.get("plot_paths") or {}
	if pp:
	lines += ["### Plots generated", ""]
	for name, path in sorted(pp.items()):
	lines.append(f"- `{name}` → `{path}`")
	lines.append("")

	expl = (result.get("eval") or {}).get("shap_explanation_text") or ""
	if expl.strip():
	lines += ["### SHAP (example row)", "", expl, ""]

	lines += ["## 7. Recommended next steps", ""]
	for s in _generate_next_steps(result):
	lines.append(f"- {s}")
	lines += ["", "Generated by automl-engineer", ""]
	return "\n".join(lines).strip() + "\n"


	def _build_html(result: dict[str, Any]) -> str:
	"""Self-contained HTML report with embedded PNG plots."""
	title = html.escape("AutoML pipeline report")
	bg = "#ffffff"
	fg = "#111111"
	border = "#d4d4d4"
	parts: list[str] = [
	f"""<!DOCTYPE html>
	<html lang="en"><head><meta charset="utf-8"><title>{title}</title>
	<style>
	body {{ font-family: Segoe UI, system-ui, sans-serif; background:{bg}; color:{fg}; padding: 28px; max-width: 960px; margin: 0 auto; }}
	h1 {{ color: {PALETTE["primary"]}; }}
	h2 {{ border-bottom: 2px solid {PALETTE["secondary"]}; padding-bottom: 6px; }}
	table.meta {{ border-collapse: collapse; width: 100%; margin: 16px 0; }}
	table.meta td, table.meta th {{ border: 1px solid {border}; padding: 8px 12px; text-align: left; }}
	table.data {{ border-collapse: collapse; width: 100%; margin: 12px 0; font-size: 14px; }}
	table.data th, table.data td {{ border: 1px solid {border}; padding: 6px 10px; }}
	img.plot {{ max-width: 100%; height: auto; display: block; margin: 16px 0; border: 1px solid {border}; border-radius: 6px; }}
	.caption {{ font-size: 13px; color: #444; margin-bottom: 24px; }}
	.note {{ background: #f5f5f5; border-left: 4px solid {PALETTE["primary"]}; padding: 12px 16px; margin: 20px 0; }}
	</style></head><body>
	<h1>{title}</h1>
	<table class="meta">
	<tr><th>Best model (tuned)</th><td>{html.escape(_best_model_tuned_line(result))}</td></tr>
	<tr><th>Tuning improvement</th><td>{html.escape(_tuning_improvement_line(result))}</td></tr>
	<tr><th>SHAP analysis</th><td>{html.escape(_shap_status(result))}</td></tr>
	<tr><th>Total pipeline steps</th><td>{_count_pipeline_steps(result)}</td></tr>
	<tr><th>Target</th><td>{html.escape(str(result.get("target_col", "—")))}</td></tr>
	<tr><th>Task</th><td>{html.escape(str(result.get("task_type", "—")))}</td></tr>
	</table>
	""",
	]

	if result.get("plan"):
	plan = result["plan"]
	dp = plan.get("dataset_profile") or {}
	parts.append("<h2>3. Training Plan</h2>")
	parts.append("<h3>Dataset profile</h3><table class='data'>")
	parts.append(
	f"<tr><th>Rows</th><td>{html.escape(str(dp.get('n_rows', '—')))}</td></tr>"
	f"<tr><th>Features</th><td>{html.escape(str(dp.get('n_features', '—')))}</td></tr>"
	f"<tr><th>Size category</th><td>{html.escape(_dataset_size_label(dp))}</td></tr>"
	f"<tr><th>Imbalance ratio</th><td>{html.escape(str(dp.get('imbalance_ratio', '—')))}</td></tr>"
	"</table>"
	)
	parts.append("<h3>Models</h3><table class='data'><tr><th>Model</th><th>Selected</th><th>Reason</th></tr>")
	reasons = plan.get("skip_reasons") or {}
	for m in plan.get("recommended_models") or []:
	parts.append(
	"<tr><td>"
	+ html.escape(str(m))
	+ "</td><td>Yes</td><td>Included in the training plan.</td></tr>"
	)
	for m in plan.get("skip_models") or []:
	r = reasons.get(m, "Excluded by training plan rules.")
	parts.append(
	f"<tr><td>{html.escape(str(m))}</td><td>No</td><td>{html.escape(str(r))}</td></tr>"
	)
	parts.append("</table>")
	parts.append(
	f"<h3>Primary metric</h3><p><strong>{html.escape(str(plan.get('primary_metric', '—')))}</strong> — "
	f"{html.escape(str(plan.get('metric_reasoning', '')))}</p>"
	)
	parts.append(
	f"<h3>Tuning budget</h3><p>Trials: {html.escape(str(plan.get('n_trials', '—')))}, "
	f"timeout (s): {html.escape(str(plan.get('timeout', '—')))}</p>"
	)
	for w in plan.get("warnings") or []:
	parts.append(f"<p class='caption'>⚠️ {html.escape(str(w))}</p>")

	cdf = result.get("comparison_df")
	if cdf is not None:
	parts.append("<h2>4. Model comparison</h2>")
	parts.append(
	"<p class='caption'>CV Mean, CV Std, CV Train Mean, and CV Overfit are from k-fold "
	"cross-validation on the training data when sample size allows.</p>"
	)
	try:
	parts.append(cdf.to_html(index=False, classes="data", border=0, escape=True))
	except Exception:
	parts.append(f"<pre>{html.escape(str(cdf))}</pre>")
	trh = result.get("train") or {}
	bnh = str(result.get("best_model_name", "—")).replace(" (tuned)", "").strip()
	pmh = trh.get("metric_name") or (
	"roc_auc" if result.get("task_type") == "classification" else "r2"
	)
	for rh in trh.get("results") or []:
	if rh.get("name") == bnh:
	cvm = rh.get("cv_mean")
	cvs = rh.get("cv_std")
	nf = trh.get("cv_folds_used") or 5
	if cvm is not None and cvs is not None:
	parts.append(
	"<p><strong>"
	+ html.escape(bnh)
	+ "</strong> was selected using cross-validated performance: CV mean "
	f"{float(cvm):.4f} ± {float(cvs):.4f} across {nf} folds "
	f"(primary metric: <code>{html.escape(str(pmh))}</code>).</p>"
	)
	break

	tune = result.get("tune")
	if tune is not None:
	parts.append("<h2>5. Hyperparameter Tuning</h2>")
	if tune.get("success"):
	parts.append(
	"<table class='data'><tr><th>Baseline</th><td>"
	f"{html.escape(str(tune.get('baseline_score')))}</td></tr>"
	f"<tr><th>After tuning</th><td>{html.escape(str(tune.get('best_score')))}</td></tr>"
	f"<tr><th>Improvement</th><td>{html.escape(str(tune.get('improvement')))}</td></tr>"
	f"<tr><th>Trials</th><td>{html.escape(str(tune.get('n_trials_run')))}</td></tr>"
	f"<tr><th>Time (s)</th><td>{html.escape(str(tune.get('tuning_time_s')))}</td></tr>"
	f"<tr><th>Overfit flag</th><td>{html.escape(str(tune.get('overfit')))}</td></tr></table>"
	)
	bp = tune.get("best_params") or {}
	if bp:
	parts.append("<h3>Best hyperparameters</h3><table class='data'><tr><th>Parameter</th><th>Value</th></tr>")
	for k, v in sorted(bp.items())[:48]:
	parts.append(
	f"<tr><td>{html.escape(str(k))}</td><td>{html.escape(repr(v))}</td></tr>"
	)
	parts.append("</table>")
	else:
	parts.append(f"<p>{html.escape(str(tune.get('error', 'Unknown error')))}</p>")

	parts.append("<h2>6. Evaluation metrics</h2>")
	parts.append(f"<pre>{html.escape(str(result.get('metrics', {})))}</pre>")

	pp = result.get("plot_paths") or {}
	base_keys = [
	"confusion_matrix",
	"roc_curve",
	"actual_vs_predicted",
	"residuals",
	"feature_importance",
	"shap_bar",
	"shap_summary",
	"shap_waterfall",
	]
	parts.append("<h2>Figures</h2>")
	embedded = 0
	for key in base_keys:
	path = pp.get(key)
	if not path:
	continue
	b64 = _embed_png(path)
	if not b64:
	continue
	parts.append(
	f"<h3>{html.escape(key.replace('_', ' ').title())}</h3>"
	f"<img class='plot' alt=\"{html.escape(key)}\" "
	f'src="data:image/png;base64,{b64}" />'
	)
	embedded += 1

	dep_keys = sorted(k for k in pp if str(k).startswith("shap_dependence_"))
	if dep_keys:
	parts.append("<h2>SHAP Feature Deep Dive</h2>")
	parts.append(
	"<p>Dependence plots show how each top feature individually affects the model output on test samples. "
	"Color encodes the most interacting feature (when available).</p>"
	)
	for key in dep_keys:
	path = pp.get(key)
	if not path:
	continue
	b64 = _embed_png(path)
	if not b64:
	continue
	feat_part = str(key).replace("shap_dependence_", "", 1).replace("_", " ")
	parts.append(
	f"<h3>{html.escape(feat_part)}</h3>"
	f"<img class='plot' alt=\"{html.escape(key)}\" src=\"data:image/png;base64,{b64}\" />"
	"<p class='caption'>This plot shows how <strong>"
	f"{html.escape(feat_part)}</strong> affects the model prediction. "
	"Each dot is one test sample. The color shows the value of the most interacting feature.</p>"
	)
	embedded += 1

	parts.append(
	f"<p class='note'><strong>Plots embedded:</strong> {embedded}. "
	"The HTML report is fully self-contained — all plots are embedded. Safe to email or share.</p>"
	)

	parts.append("<h2>Recommended next steps</h2><ul>")
	for s in _generate_next_steps(result):
	parts.append(f"<li>{html.escape(s)}</li>")
	parts.append("</ul></body></html>")
	return "".join(parts)


	def count_embedded_plots_html(html_str: str) -> int:
	return html_str.count("data:image/png;base64,")