openmed-autoresearch / analyze.py
AutoResearch Agent
initial commit
ecbf3a0
"""
analyze.py — Parse results.tsv and generate transfer affinity analysis.
Usage: python analyze.py results.tsv
Produces:
- transfer_summary.txt: text report of findings
- Can be extended to generate matplotlib heatmaps if desired
"""
import sys
import csv
import re
from collections import defaultdict
DATASETS = ["bc5cdr_chem", "ncbi_disease", "bc2gm", "jnlpba", "linnaeus"]
def parse_results(filepath):
"""Parse results.tsv into structured experiment records."""
experiments = []
with open(filepath) as f:
reader = csv.DictReader(f, delimiter="\t")
for row in reader:
experiments.append({
"id": row.get("experiment", ""),
"desc": row.get("description", ""),
"f1": float(row.get("val_f1", 0)),
"vram": int(row.get("peak_vram_mb", 0)),
"kept": row.get("kept", "").lower() == "yes",
})
return experiments
def extract_transfer_pairs(experiments):
"""Try to extract source→target pairs from experiment descriptions."""
pairs = defaultdict(list) # (source, target) -> [f1 scores]
baseline_f1 = None
for exp in experiments:
desc = exp["desc"].lower()
# Look for baseline
if "baseline" in desc or "no curriculum" in desc:
baseline_f1 = exp["f1"]
continue
# Look for dataset names in description
sources_found = [ds for ds in DATASETS if ds in desc and ds != "ncbi_disease"]
if sources_found:
for src in sources_found:
pairs[(src, "ncbi_disease")].append(exp["f1"])
return pairs, baseline_f1
def main():
if len(sys.argv) < 2:
print("Usage: python analyze.py results.tsv")
sys.exit(1)
experiments = parse_results(sys.argv[1])
if not experiments:
print("No experiments found in results.tsv")
sys.exit(1)
pairs, baseline_f1 = extract_transfer_pairs(experiments)
report = []
report.append("=" * 60)
report.append("OPENMED CROSS-DATASET TRANSFER AFFINITY REPORT")
report.append("=" * 60)
report.append(f"\nTotal experiments: {len(experiments)}")
report.append(f"Kept (improved): {sum(1 for e in experiments if e['kept'])}")
report.append(f"Baseline F1: {baseline_f1:.4f}" if baseline_f1 else "Baseline: not found")
if baseline_f1 and pairs:
report.append("\n" + "-" * 60)
report.append("TRANSFER AFFINITY: Source → NCBI Disease NER")
report.append("-" * 60)
report.append(f"{'Source Dataset':<20} {'Best F1':>10} {'Avg F1':>10} {'ΔF1 vs Base':>12} {'N':>5}")
report.append("-" * 60)
for src in DATASETS:
if src == "ncbi_disease":
continue
key = (src, "ncbi_disease")
if key in pairs:
scores = pairs[key]
best = max(scores)
avg = sum(scores) / len(scores)
delta = best - baseline_f1
sign = "+" if delta > 0 else ""
report.append(f"{src:<20} {best:>10.4f} {avg:>10.4f} {sign}{delta:>11.4f} {len(scores):>5}")
else:
report.append(f"{src:<20} {'N/A':>10} {'N/A':>10} {'N/A':>12} {'0':>5}")
# Top experiments
report.append("\n" + "-" * 60)
report.append("TOP 10 EXPERIMENTS BY F1")
report.append("-" * 60)
sorted_exps = sorted(experiments, key=lambda x: x["f1"], reverse=True)
for exp in sorted_exps[:10]:
kept_str = "✓" if exp["kept"] else "✗"
report.append(f" {kept_str} F1={exp['f1']:.4f} {exp['desc'][:60]}")
# Kept improvements timeline
report.append("\n" + "-" * 60)
report.append("IMPROVEMENT TIMELINE (kept experiments only)")
report.append("-" * 60)
for exp in experiments:
if exp["kept"]:
report.append(f" #{exp['id']}: F1={exp['f1']:.4f}{exp['desc'][:60]}")
output = "\n".join(report)
print(output)
with open("transfer_summary.txt", "w") as f:
f.write(output)
print(f"\nSaved to transfer_summary.txt")
if __name__ == "__main__":
main()