"""Side-by-side comparison: v1 hand-crafted features vs v2 ESM-2 embeddings. Reads: artifacts/baseline_results.json (v1) artifacts/embedding_results.json (v2) Writes: artifacts/v1_vs_v2_comparison.md """ from __future__ import annotations import json from microbe_model import config def main() -> None: v1_path = config.ARTIFACTS / "baseline_results.json" v2_path = config.ARTIFACTS / "embedding_results.json" if not v1_path.exists() or not v2_path.exists(): raise SystemExit(f"Need both {v1_path} and {v2_path}") v1 = json.loads(v1_path.read_text()) v2 = json.loads(v2_path.read_text()) v1.pop("__meta__", None) v2.pop("__meta__", None) lines = [ "# v1 (hand-crafted features) vs v2 (ESM-2 embeddings)", "", "Same train/test splits, same XGBoost hyperparameters. Only difference: input features.", "", "| Target | v1 (n features) | v2 (embedding dim) | Δ |", "|---|---|---|---|", ] for target in ("optimal_temperature_c", "optimal_ph", "oxygen_requirement", "salt_tolerance_pct"): v1_metric = v1.get(target, {}).get("mean_metric") v2_metric = v2.get(target, {}).get("mean_metric") task = v1.get(target, {}).get("task") or v2.get(target, {}).get("task", "?") if v1_metric is None or v2_metric is None: lines.append(f"| `{target}` | — | — | — |") continue if task == "regression": delta = v2_metric - v1_metric arrow = "🟢" if delta < 0 else ("🔴" if delta > 0 else "⚪") lines.append(f"| `{target}` | MAE {v1_metric:.3f} | MAE {v2_metric:.3f} | " f"{arrow} {delta:+.3f} ({delta / v1_metric * 100:+.1f}%) |") else: delta = v2_metric - v1_metric arrow = "🟢" if delta > 0 else ("🔴" if delta < 0 else "⚪") lines.append(f"| `{target}` | F1 {v1_metric:.3f} | F1 {v2_metric:.3f} | " f"{arrow} {delta:+.3f} ({delta / max(0.001, v1_metric) * 100:+.1f}%) |") lines.extend([ "", "## Reading this table", "", "- 🟢 = embeddings beat hand-crafted features", "- 🔴 = hand-crafted features beat embeddings", "- ⚪ = no difference", "", "Regression: lower MAE is better, so a *negative* delta is good. ", "Classification: higher F1 is better, so a *positive* delta is good.", "", "## Interpretation", "", "- **≥ 10% lift on T_opt:** validates the genome-LM direction. Worth investing", " in larger models (ESM-2 t33_650M or Nucleotide Transformer / Evo-1).", "- **pH or salt go from broken (≤5%) to working (≥15%):** embeddings recover", " signal that hand-crafted features couldn't capture. Big win for the thesis.", "- **No meaningful lift anywhere:** the bottleneck is not feature engineering. ", " Need new data sources (failed cultivation logs, environmental metadata).", ]) out_path = config.ARTIFACTS / "v1_vs_v2_comparison.md" out_path.write_text("\n".join(lines)) print(f"Wrote {out_path}") for line in lines: print(line) if __name__ == "__main__": main()