| """ |
| Report generator: rich markdown/HTML from pipeline results. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import base64 |
| import html |
| from datetime import datetime |
| from pathlib import Path |
| from typing import Any |
|
|
| |
| PALETTE = { |
| "primary": "#534AB7", |
| "secondary": "#1D9E75", |
| "accent": "#D85A30", |
| } |
|
|
|
|
| def generate_report( |
| objective: str, |
| dataset_name: str, |
| metrics: dict, |
| best_model: str, |
| output_dir: str | Path = "outputs", |
| ) -> Path: |
| """ |
| Generate a markdown report with objective, dataset, metrics, and best model. |
| """ |
| output_dir = Path(output_dir) |
| output_dir.mkdir(parents=True, exist_ok=True) |
|
|
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
| report_path = output_dir / f"report_{timestamp}.md" |
|
|
| content = f"""# AutoML Report |
| |
| **Generated:** {datetime.now().isoformat()} |
| |
| ## Objective |
| {objective} |
| |
| ## Dataset |
| {dataset_name} |
| |
| ## Best Model |
| {best_model} |
| |
| ## Metrics |
| ```json |
| {metrics} |
| ``` |
| |
| --- |
| *Report generated by automl-engineer-agent* |
| """ |
| report_path.write_text(content, encoding="utf-8") |
| return report_path |
|
|
|
|
| def _dataset_size_label(dp: dict[str, Any]) -> str: |
| n = int(dp.get("n_rows", 0) or 0) |
| if n < 1000: |
| return "Small (< 1000 rows)" |
| if n <= 10000: |
| return "Medium" |
| return "Large (> 10000 rows)" |
|
|
|
|
| def _embed_png(path: str) -> str: |
| try: |
| p = Path(path) |
| if not p.exists(): |
| return "" |
| raw = p.read_bytes() |
| return base64.b64encode(raw).decode("ascii") |
| except Exception: |
| return "" |
|
|
|
|
| def _shap_status(result: dict[str, Any]) -> str: |
| pp = result.get("plot_paths") or {} |
| ev = result.get("eval") or {} |
| if ev.get("has_shap") or any( |
| k in pp for k in ("shap_bar", "shap_summary") |
| ) or any(str(k).startswith("shap_dependence_") for k in pp): |
| return "Complete" |
| return "Not available" |
|
|
|
|
| def _best_model_tuned_line(result: dict[str, Any]) -> str: |
| name = result.get("best_model_name") or "—" |
| tune = result.get("tune") or {} |
| if tune.get("success"): |
| return f"{name} (tuned)" |
| return str(name) |
|
|
|
|
| def _tuning_improvement_line(result: dict[str, Any]) -> str: |
| tune = result.get("tune") or {} |
| if not tune.get("success"): |
| return "Not run" |
| imp = tune.get("improvement") |
| if imp is None: |
| return "—" |
| return f"{float(imp):+.4f}" |
|
|
|
|
| def _count_pipeline_steps(result: dict[str, Any]) -> int: |
| n = 0 |
| if result.get("eda"): |
| n += 1 |
| if result.get("task"): |
| n += 1 |
| if result.get("prep"): |
| n += 1 |
| if result.get("plan"): |
| n += 1 |
| if result.get("train"): |
| n += 1 |
| if result.get("tune"): |
| n += 1 |
| if result.get("eval") or result.get("plot_paths"): |
| n += 1 |
| return n |
|
|
|
|
| def _generate_next_steps(result: dict[str, Any]) -> list[str]: |
| steps: list[str] = [] |
| eda = result.get("eda") or {} |
| prep = result.get("prep") or {} |
| ti = eda.get("target_info") or {} |
| miss = eda.get("missing", {}).get("by_column", {}) or {} |
| train_data = result.get("train", {}) or {} |
| overfit_warnings = list(train_data.get("overfitting_warnings", [])) |
|
|
| if eda.get("overview", {}).get("rows", 0) < 500: |
| steps.append("Collect more rows or use simpler models to reduce variance.") |
| if overfit_warnings: |
| steps.append("Reduce model complexity, add regularization, or gather more diverse data.") |
| if any(info.get("pct", 0) > 30 for info in miss.values()): |
| steps.append("Impute or drop high-missing columns before retraining.") |
| if ti.get("imbalance_ratio", 0) and ti.get("imbalance_ratio", 0) > 5: |
| steps.append("Try class_weight='balanced', SMOTE, or stratified sampling.") |
| if prep.get("target_leakage_suspicion"): |
| steps.append("Review columns flagged for possible target leakage.") |
|
|
| tune = result.get("tune") or {} |
| if tune.get("success"): |
| imp = float(tune.get("improvement") or 0.0) |
| if imp < 0.01: |
| steps.append( |
| "Hyperparameter tuning showed minimal improvement — consider feature engineering instead." |
| ) |
| if imp > 0.05: |
| steps.append( |
| "Significant improvement from tuning — consider expanding the search space with more trials." |
| ) |
| if tune.get("overfit"): |
| steps.append( |
| "Even after tuning the model overfit — try adding more training data or reducing model complexity." |
| ) |
|
|
| steps.append("Run k-fold cross-validation on the training split to validate stability.") |
| return steps[:12] |
|
|
|
|
| def _build_plan_section_md(plan: dict[str, Any]) -> list[str]: |
| dp = plan.get("dataset_profile") or {} |
| lines: list[str] = [ |
| "## 3. Training Plan", |
| "", |
| "### Dataset profile", |
| "", |
| "| Field | Value |", |
| "|-------|-------|", |
| f"| Rows | {dp.get('n_rows', '—')} |", |
| f"| Features | {dp.get('n_features', '—')} |", |
| f"| Size category | {_dataset_size_label(dp)} |", |
| f"| Imbalance ratio | {dp.get('imbalance_ratio', '—')} |", |
| "", |
| "### Models selected", |
| "", |
| "| Model | Selected | Reason |", |
| "|-------|----------|--------|", |
| ] |
| rec = plan.get("recommended_models") or [] |
| skip = plan.get("skip_models") or [] |
| reasons = plan.get("skip_reasons") or {} |
| for m in rec: |
| lines.append(f"| {m} | Yes | Included in the training plan for this dataset. |") |
| for m in skip: |
| r = reasons.get(m, "Excluded by training plan rules.") |
| lines.append(f"| {m} | No | {r} |") |
| lines += [ |
| "", |
| "### Primary metric", |
| "", |
| f"- **Metric:** `{plan.get('primary_metric', '—')}`", |
| f"- **Reasoning:** {plan.get('metric_reasoning', '—')}", |
| "", |
| "### Tuning budget", |
| "", |
| f"- **Optuna trials:** {plan.get('n_trials', '—')}", |
| f"- **Timeout (s):** {plan.get('timeout', '—')}", |
| "", |
| ] |
| warns = plan.get("warnings") or [] |
| if warns: |
| lines.append("### Warnings") |
| lines.append("") |
| for w in warns: |
| lines.append(f"- ⚠️ {w}") |
| lines.append("") |
| return lines |
|
|
|
|
| def _build_tune_section_md(tune: dict[str, Any]) -> list[str]: |
| if not tune.get("success"): |
| return [ |
| "## 5. Hyperparameter Tuning", |
| "", |
| f"Tuning did not complete successfully: {tune.get('error', 'unknown')}", |
| "", |
| ] |
| bp = tune.get("best_params") or {} |
| lines = [ |
| "## 5. Hyperparameter Tuning", |
| "", |
| "| Metric | Value |", |
| "|--------|-------|", |
| f"| Baseline score (test) | {tune.get('baseline_score', '—')} |", |
| f"| Best score after tuning | {tune.get('best_score', '—')} |", |
| f"| Improvement | {tune.get('improvement', '—')} |", |
| f"| Trials run | {tune.get('n_trials_run', '—')} |", |
| f"| Tuning time (s) | {tune.get('tuning_time_s', '—')} |", |
| f"| Overfit (train–test gap heuristic) | {tune.get('overfit', '—')} |", |
| "", |
| "### Best hyperparameters", |
| "", |
| "| Parameter | Value |", |
| "|-----------|-------|", |
| ] |
| for k, v in sorted(bp.items())[:48]: |
| lines.append(f"| `{k}` | `{v!r}` |") |
| lines.append("") |
| return lines |
|
|
|
|
| def _build_markdown(result: dict[str, Any]) -> str: |
| """Full markdown report from a pipeline result dict.""" |
| lines: list[str] = [ |
| "# AutoML pipeline report", |
| "", |
| "| Field | Value |", |
| "|-------|-------|", |
| f"| **Best model (tuned)** | {_best_model_tuned_line(result)} |", |
| f"| **Tuning improvement** | {_tuning_improvement_line(result)} |", |
| f"| **SHAP analysis** | {_shap_status(result)} |", |
| f"| **Total pipeline steps** | {_count_pipeline_steps(result)} |", |
| f"| **Target** | {result.get('target_col', '—')} |", |
| f"| **Task** | {result.get('task_type', '—')} |", |
| "", |
| "## 1. Overview", |
| "", |
| f"**Best model:** {result.get('best_model_name', '—')}", |
| ] |
| tr_ov = result.get("train") or {} |
| _bn = str(result.get("best_model_name", "—")).replace(" (tuned)", "").strip() |
| _pm = tr_ov.get("metric_name") or ( |
| "roc_auc" if result.get("task_type") == "classification" else "r2" |
| ) |
| for _r in tr_ov.get("results") or []: |
| if _r.get("name") == _bn: |
| _cvm = _r.get("cv_mean") |
| _cvs = _r.get("cv_std") |
| _nf = tr_ov.get("cv_folds_used") or 5 |
| if _cvm is not None and _cvs is not None: |
| lines.append( |
| f"\n**{_bn}** was selected using cross-validated performance: " |
| f"CV mean {float(_cvm):.4f} ± {float(_cvs):.4f} across {_nf} folds. " |
| f"That average reflects how the model scores when validated on different held-out " |
| f"subsets of the training data (primary metric: `{_pm}`)." |
| ) |
| break |
| lines.append("") |
|
|
| prep = result.get("prep") or {} |
| if prep: |
| lines += [ |
| "## 2. Preprocessing", |
| "", |
| f"- **Final feature count:** {prep.get('final_feature_count', '—')}", |
| f"- **Train / test size:** {prep.get('train_size', '—')} / {prep.get('test_size', '—')}", |
| "", |
| ] |
|
|
| if result.get("plan"): |
| lines += _build_plan_section_md(result["plan"]) |
| else: |
| lines += ["## 3. Training Plan", "", "*Not available for this run.*", ""] |
|
|
| cdf = result.get("comparison_df") |
| lines += [ |
| "## 4. Model comparison", |
| "", |
| "The table includes **CV Mean**, **CV Std**, **CV Train Mean**, and **CV Overfit** when " |
| "cross-validation ran; otherwise those cells are empty.", |
| "", |
| ] |
| if cdf is not None: |
| try: |
| lines.append(cdf.to_markdown(index=False)) |
| except Exception: |
| lines.append(str(cdf)) |
| else: |
| lines.append("*No comparison table.*") |
| lines.append("") |
|
|
| if result.get("tune") is not None: |
| lines += _build_tune_section_md(result["tune"]) |
| else: |
| lines += ["## 5. Hyperparameter Tuning", "", "*Not run.*", ""] |
|
|
| lines += [ |
| "## 6. Evaluation metrics", |
| "", |
| "```", |
| str(result.get("metrics", {})), |
| "```", |
| "", |
| ] |
|
|
| pp = result.get("plot_paths") or {} |
| if pp: |
| lines += ["### Plots generated", ""] |
| for name, path in sorted(pp.items()): |
| lines.append(f"- `{name}` → `{path}`") |
| lines.append("") |
|
|
| expl = (result.get("eval") or {}).get("shap_explanation_text") or "" |
| if expl.strip(): |
| lines += ["### SHAP (example row)", "", expl, ""] |
|
|
| lines += ["## 7. Recommended next steps", ""] |
| for s in _generate_next_steps(result): |
| lines.append(f"- {s}") |
| lines += ["", "*Generated by automl-engineer*", ""] |
| return "\n".join(lines).strip() + "\n" |
|
|
|
|
| def _build_html(result: dict[str, Any]) -> str: |
| """Self-contained HTML report with embedded PNG plots.""" |
| title = html.escape("AutoML pipeline report") |
| bg = "#ffffff" |
| fg = "#111111" |
| border = "#d4d4d4" |
| parts: list[str] = [ |
| f"""<!DOCTYPE html> |
| <html lang="en"><head><meta charset="utf-8"><title>{title}</title> |
| <style> |
| body {{ font-family: Segoe UI, system-ui, sans-serif; background:{bg}; color:{fg}; padding: 28px; max-width: 960px; margin: 0 auto; }} |
| h1 {{ color: {PALETTE["primary"]}; }} |
| h2 {{ border-bottom: 2px solid {PALETTE["secondary"]}; padding-bottom: 6px; }} |
| table.meta {{ border-collapse: collapse; width: 100%; margin: 16px 0; }} |
| table.meta td, table.meta th {{ border: 1px solid {border}; padding: 8px 12px; text-align: left; }} |
| table.data {{ border-collapse: collapse; width: 100%; margin: 12px 0; font-size: 14px; }} |
| table.data th, table.data td {{ border: 1px solid {border}; padding: 6px 10px; }} |
| img.plot {{ max-width: 100%; height: auto; display: block; margin: 16px 0; border: 1px solid {border}; border-radius: 6px; }} |
| .caption {{ font-size: 13px; color: #444; margin-bottom: 24px; }} |
| .note {{ background: #f5f5f5; border-left: 4px solid {PALETTE["primary"]}; padding: 12px 16px; margin: 20px 0; }} |
| </style></head><body> |
| <h1>{title}</h1> |
| <table class="meta"> |
| <tr><th>Best model (tuned)</th><td>{html.escape(_best_model_tuned_line(result))}</td></tr> |
| <tr><th>Tuning improvement</th><td>{html.escape(_tuning_improvement_line(result))}</td></tr> |
| <tr><th>SHAP analysis</th><td>{html.escape(_shap_status(result))}</td></tr> |
| <tr><th>Total pipeline steps</th><td>{_count_pipeline_steps(result)}</td></tr> |
| <tr><th>Target</th><td>{html.escape(str(result.get("target_col", "—")))}</td></tr> |
| <tr><th>Task</th><td>{html.escape(str(result.get("task_type", "—")))}</td></tr> |
| </table> |
| """, |
| ] |
|
|
| if result.get("plan"): |
| plan = result["plan"] |
| dp = plan.get("dataset_profile") or {} |
| parts.append("<h2>3. Training Plan</h2>") |
| parts.append("<h3>Dataset profile</h3><table class='data'>") |
| parts.append( |
| f"<tr><th>Rows</th><td>{html.escape(str(dp.get('n_rows', '—')))}</td></tr>" |
| f"<tr><th>Features</th><td>{html.escape(str(dp.get('n_features', '—')))}</td></tr>" |
| f"<tr><th>Size category</th><td>{html.escape(_dataset_size_label(dp))}</td></tr>" |
| f"<tr><th>Imbalance ratio</th><td>{html.escape(str(dp.get('imbalance_ratio', '—')))}</td></tr>" |
| "</table>" |
| ) |
| parts.append("<h3>Models</h3><table class='data'><tr><th>Model</th><th>Selected</th><th>Reason</th></tr>") |
| reasons = plan.get("skip_reasons") or {} |
| for m in plan.get("recommended_models") or []: |
| parts.append( |
| "<tr><td>" |
| + html.escape(str(m)) |
| + "</td><td>Yes</td><td>Included in the training plan.</td></tr>" |
| ) |
| for m in plan.get("skip_models") or []: |
| r = reasons.get(m, "Excluded by training plan rules.") |
| parts.append( |
| f"<tr><td>{html.escape(str(m))}</td><td>No</td><td>{html.escape(str(r))}</td></tr>" |
| ) |
| parts.append("</table>") |
| parts.append( |
| f"<h3>Primary metric</h3><p><strong>{html.escape(str(plan.get('primary_metric', '—')))}</strong> — " |
| f"{html.escape(str(plan.get('metric_reasoning', '')))}</p>" |
| ) |
| parts.append( |
| f"<h3>Tuning budget</h3><p>Trials: {html.escape(str(plan.get('n_trials', '—')))}, " |
| f"timeout (s): {html.escape(str(plan.get('timeout', '—')))}</p>" |
| ) |
| for w in plan.get("warnings") or []: |
| parts.append(f"<p class='caption'>⚠️ {html.escape(str(w))}</p>") |
|
|
| cdf = result.get("comparison_df") |
| if cdf is not None: |
| parts.append("<h2>4. Model comparison</h2>") |
| parts.append( |
| "<p class='caption'>CV Mean, CV Std, CV Train Mean, and CV Overfit are from k-fold " |
| "cross-validation on the training data when sample size allows.</p>" |
| ) |
| try: |
| parts.append(cdf.to_html(index=False, classes="data", border=0, escape=True)) |
| except Exception: |
| parts.append(f"<pre>{html.escape(str(cdf))}</pre>") |
| trh = result.get("train") or {} |
| bnh = str(result.get("best_model_name", "—")).replace(" (tuned)", "").strip() |
| pmh = trh.get("metric_name") or ( |
| "roc_auc" if result.get("task_type") == "classification" else "r2" |
| ) |
| for rh in trh.get("results") or []: |
| if rh.get("name") == bnh: |
| cvm = rh.get("cv_mean") |
| cvs = rh.get("cv_std") |
| nf = trh.get("cv_folds_used") or 5 |
| if cvm is not None and cvs is not None: |
| parts.append( |
| "<p><strong>" |
| + html.escape(bnh) |
| + "</strong> was selected using cross-validated performance: CV mean " |
| f"{float(cvm):.4f} ± {float(cvs):.4f} across {nf} folds " |
| f"(primary metric: <code>{html.escape(str(pmh))}</code>).</p>" |
| ) |
| break |
|
|
| tune = result.get("tune") |
| if tune is not None: |
| parts.append("<h2>5. Hyperparameter Tuning</h2>") |
| if tune.get("success"): |
| parts.append( |
| "<table class='data'><tr><th>Baseline</th><td>" |
| f"{html.escape(str(tune.get('baseline_score')))}</td></tr>" |
| f"<tr><th>After tuning</th><td>{html.escape(str(tune.get('best_score')))}</td></tr>" |
| f"<tr><th>Improvement</th><td>{html.escape(str(tune.get('improvement')))}</td></tr>" |
| f"<tr><th>Trials</th><td>{html.escape(str(tune.get('n_trials_run')))}</td></tr>" |
| f"<tr><th>Time (s)</th><td>{html.escape(str(tune.get('tuning_time_s')))}</td></tr>" |
| f"<tr><th>Overfit flag</th><td>{html.escape(str(tune.get('overfit')))}</td></tr></table>" |
| ) |
| bp = tune.get("best_params") or {} |
| if bp: |
| parts.append("<h3>Best hyperparameters</h3><table class='data'><tr><th>Parameter</th><th>Value</th></tr>") |
| for k, v in sorted(bp.items())[:48]: |
| parts.append( |
| f"<tr><td>{html.escape(str(k))}</td><td>{html.escape(repr(v))}</td></tr>" |
| ) |
| parts.append("</table>") |
| else: |
| parts.append(f"<p>{html.escape(str(tune.get('error', 'Unknown error')))}</p>") |
|
|
| parts.append("<h2>6. Evaluation metrics</h2>") |
| parts.append(f"<pre>{html.escape(str(result.get('metrics', {})))}</pre>") |
|
|
| pp = result.get("plot_paths") or {} |
| base_keys = [ |
| "confusion_matrix", |
| "roc_curve", |
| "actual_vs_predicted", |
| "residuals", |
| "feature_importance", |
| "shap_bar", |
| "shap_summary", |
| "shap_waterfall", |
| ] |
| parts.append("<h2>Figures</h2>") |
| embedded = 0 |
| for key in base_keys: |
| path = pp.get(key) |
| if not path: |
| continue |
| b64 = _embed_png(path) |
| if not b64: |
| continue |
| parts.append( |
| f"<h3>{html.escape(key.replace('_', ' ').title())}</h3>" |
| f"<img class='plot' alt=\"{html.escape(key)}\" " |
| f'src="data:image/png;base64,{b64}" />' |
| ) |
| embedded += 1 |
|
|
| dep_keys = sorted(k for k in pp if str(k).startswith("shap_dependence_")) |
| if dep_keys: |
| parts.append("<h2>SHAP Feature Deep Dive</h2>") |
| parts.append( |
| "<p>Dependence plots show how each top feature individually affects the model output on test samples. " |
| "Color encodes the most interacting feature (when available).</p>" |
| ) |
| for key in dep_keys: |
| path = pp.get(key) |
| if not path: |
| continue |
| b64 = _embed_png(path) |
| if not b64: |
| continue |
| feat_part = str(key).replace("shap_dependence_", "", 1).replace("_", " ") |
| parts.append( |
| f"<h3>{html.escape(feat_part)}</h3>" |
| f"<img class='plot' alt=\"{html.escape(key)}\" src=\"data:image/png;base64,{b64}\" />" |
| "<p class='caption'>This plot shows how <strong>" |
| f"{html.escape(feat_part)}</strong> affects the model prediction. " |
| "Each dot is one test sample. The color shows the value of the most interacting feature.</p>" |
| ) |
| embedded += 1 |
|
|
| parts.append( |
| f"<p class='note'><strong>Plots embedded:</strong> {embedded}. " |
| "The HTML report is fully self-contained — all plots are embedded. Safe to email or share.</p>" |
| ) |
|
|
| parts.append("<h2>Recommended next steps</h2><ul>") |
| for s in _generate_next_steps(result): |
| parts.append(f"<li>{html.escape(s)}</li>") |
| parts.append("</ul></body></html>") |
| return "".join(parts) |
|
|
|
|
| def count_embedded_plots_html(html_str: str) -> int: |
| return html_str.count("data:image/png;base64,") |
|
|