Spaces:
Running
Running
File size: 3,283 Bytes
8800528 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | """Side-by-side comparison: v1 hand-crafted features vs v2 ESM-2 embeddings.
Reads:
artifacts/baseline_results.json (v1)
artifacts/embedding_results.json (v2)
Writes:
artifacts/v1_vs_v2_comparison.md
"""
from __future__ import annotations
import json
from microbe_model import config
def main() -> None:
v1_path = config.ARTIFACTS / "baseline_results.json"
v2_path = config.ARTIFACTS / "embedding_results.json"
if not v1_path.exists() or not v2_path.exists():
raise SystemExit(f"Need both {v1_path} and {v2_path}")
v1 = json.loads(v1_path.read_text())
v2 = json.loads(v2_path.read_text())
v1.pop("__meta__", None)
v2.pop("__meta__", None)
lines = [
"# v1 (hand-crafted features) vs v2 (ESM-2 embeddings)",
"",
"Same train/test splits, same XGBoost hyperparameters. Only difference: input features.",
"",
"| Target | v1 (n features) | v2 (embedding dim) | Ξ |",
"|---|---|---|---|",
]
for target in ("optimal_temperature_c", "optimal_ph", "oxygen_requirement", "salt_tolerance_pct"):
v1_metric = v1.get(target, {}).get("mean_metric")
v2_metric = v2.get(target, {}).get("mean_metric")
task = v1.get(target, {}).get("task") or v2.get(target, {}).get("task", "?")
if v1_metric is None or v2_metric is None:
lines.append(f"| `{target}` | β | β | β |")
continue
if task == "regression":
delta = v2_metric - v1_metric
arrow = "π’" if delta < 0 else ("π΄" if delta > 0 else "βͺ")
lines.append(f"| `{target}` | MAE {v1_metric:.3f} | MAE {v2_metric:.3f} | "
f"{arrow} {delta:+.3f} ({delta / v1_metric * 100:+.1f}%) |")
else:
delta = v2_metric - v1_metric
arrow = "π’" if delta > 0 else ("π΄" if delta < 0 else "βͺ")
lines.append(f"| `{target}` | F1 {v1_metric:.3f} | F1 {v2_metric:.3f} | "
f"{arrow} {delta:+.3f} ({delta / max(0.001, v1_metric) * 100:+.1f}%) |")
lines.extend([
"",
"## Reading this table",
"",
"- π’ = embeddings beat hand-crafted features",
"- π΄ = hand-crafted features beat embeddings",
"- βͺ = no difference",
"",
"Regression: lower MAE is better, so a *negative* delta is good. ",
"Classification: higher F1 is better, so a *positive* delta is good.",
"",
"## Interpretation",
"",
"- **β₯ 10% lift on T_opt:** validates the genome-LM direction. Worth investing",
" in larger models (ESM-2 t33_650M or Nucleotide Transformer / Evo-1).",
"- **pH or salt go from broken (β€5%) to working (β₯15%):** embeddings recover",
" signal that hand-crafted features couldn't capture. Big win for the thesis.",
"- **No meaningful lift anywhere:** the bottleneck is not feature engineering. ",
" Need new data sources (failed cultivation logs, environmental metadata).",
])
out_path = config.ARTIFACTS / "v1_vs_v2_comparison.md"
out_path.write_text("\n".join(lines))
print(f"Wrote {out_path}")
for line in lines:
print(line)
if __name__ == "__main__":
main()
|