Spaces:
Running
Running
Miyu Horiuchi
v2 results: ESM-2 t6 (8M, 20-protein sample) loses to v1 hand-crafted features on all 4 phenotype targets
8800528 | """Side-by-side comparison: v1 hand-crafted features vs v2 ESM-2 embeddings. | |
| Reads: | |
| artifacts/baseline_results.json (v1) | |
| artifacts/embedding_results.json (v2) | |
| Writes: | |
| artifacts/v1_vs_v2_comparison.md | |
| """ | |
| from __future__ import annotations | |
| import json | |
| from microbe_model import config | |
| def main() -> None: | |
| v1_path = config.ARTIFACTS / "baseline_results.json" | |
| v2_path = config.ARTIFACTS / "embedding_results.json" | |
| if not v1_path.exists() or not v2_path.exists(): | |
| raise SystemExit(f"Need both {v1_path} and {v2_path}") | |
| v1 = json.loads(v1_path.read_text()) | |
| v2 = json.loads(v2_path.read_text()) | |
| v1.pop("__meta__", None) | |
| v2.pop("__meta__", None) | |
| lines = [ | |
| "# v1 (hand-crafted features) vs v2 (ESM-2 embeddings)", | |
| "", | |
| "Same train/test splits, same XGBoost hyperparameters. Only difference: input features.", | |
| "", | |
| "| Target | v1 (n features) | v2 (embedding dim) | Ξ |", | |
| "|---|---|---|---|", | |
| ] | |
| for target in ("optimal_temperature_c", "optimal_ph", "oxygen_requirement", "salt_tolerance_pct"): | |
| v1_metric = v1.get(target, {}).get("mean_metric") | |
| v2_metric = v2.get(target, {}).get("mean_metric") | |
| task = v1.get(target, {}).get("task") or v2.get(target, {}).get("task", "?") | |
| if v1_metric is None or v2_metric is None: | |
| lines.append(f"| `{target}` | β | β | β |") | |
| continue | |
| if task == "regression": | |
| delta = v2_metric - v1_metric | |
| arrow = "π’" if delta < 0 else ("π΄" if delta > 0 else "βͺ") | |
| lines.append(f"| `{target}` | MAE {v1_metric:.3f} | MAE {v2_metric:.3f} | " | |
| f"{arrow} {delta:+.3f} ({delta / v1_metric * 100:+.1f}%) |") | |
| else: | |
| delta = v2_metric - v1_metric | |
| arrow = "π’" if delta > 0 else ("π΄" if delta < 0 else "βͺ") | |
| lines.append(f"| `{target}` | F1 {v1_metric:.3f} | F1 {v2_metric:.3f} | " | |
| f"{arrow} {delta:+.3f} ({delta / max(0.001, v1_metric) * 100:+.1f}%) |") | |
| lines.extend([ | |
| "", | |
| "## Reading this table", | |
| "", | |
| "- π’ = embeddings beat hand-crafted features", | |
| "- π΄ = hand-crafted features beat embeddings", | |
| "- βͺ = no difference", | |
| "", | |
| "Regression: lower MAE is better, so a *negative* delta is good. ", | |
| "Classification: higher F1 is better, so a *positive* delta is good.", | |
| "", | |
| "## Interpretation", | |
| "", | |
| "- **β₯ 10% lift on T_opt:** validates the genome-LM direction. Worth investing", | |
| " in larger models (ESM-2 t33_650M or Nucleotide Transformer / Evo-1).", | |
| "- **pH or salt go from broken (β€5%) to working (β₯15%):** embeddings recover", | |
| " signal that hand-crafted features couldn't capture. Big win for the thesis.", | |
| "- **No meaningful lift anywhere:** the bottleneck is not feature engineering. ", | |
| " Need new data sources (failed cultivation logs, environmental metadata).", | |
| ]) | |
| out_path = config.ARTIFACTS / "v1_vs_v2_comparison.md" | |
| out_path.write_text("\n".join(lines)) | |
| print(f"Wrote {out_path}") | |
| for line in lines: | |
| print(line) | |
| if __name__ == "__main__": | |
| main() | |