timmers commited on
Commit
a67c030
·
verified ·
1 Parent(s): d19dbd4

Upload genome_to_disease.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. genome_to_disease.py +12 -10
genome_to_disease.py CHANGED
@@ -18,7 +18,7 @@ import json, os, sys
18
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
19
  from rare_disease_kg import load_kg
20
  from pillar_a_kg_proposer import propose
21
- from genomic_scorer import top_pathogenic_variant, pathogenicity
22
 
23
  # cohort causal gene → ground-truth disease (ORPHA)
24
  GENE_DISEASE = {
@@ -36,15 +36,16 @@ def genome_to_disease(kg, gene, variant_path, k=3):
36
  return out.get("diseases", [])
37
 
38
 
39
- def _demo(online=True):
40
  kg, src = load_kg()
41
  print("=" * 82)
42
  print(f"GEMEO — genome → disease [variant data: real AlphaMissense+ClinVar | KG: {src}]")
43
  print("=" * 82)
44
  hits1, n, results = 0, 0, {}
45
  for gene, truth in GENE_DISEASE.items():
46
- v = top_pathogenic_variant(gene, online=online)
47
- if not v:
 
48
  continue
49
  path = v["alphamissense"]
50
  diseases = genome_to_disease(kg, gene, path, k=3)
@@ -60,16 +61,17 @@ def _demo(online=True):
60
  print(f" → genome→disease #1: {top} ({kg.names.get(top, top)}) {flag}")
61
  print(f" evidence: {diseases[0]['evidence']}")
62
 
63
- # benign control: a benign variant should NOT score as pathogenic
64
- b = top_pathogenic_variant("FBN1_benign", online=False)
65
- print(f"\n [control] FBN1 benign variant {b['variant']} AlphaMissense={b['alphamissense']:.3f} "
66
- f"({b['clinvar']}) pathogenic? {b['alphamissense'] >= PATHO_THRESH} (correctly NO)")
 
67
 
68
  acc = hits1 / n if n else 0.0
69
  print(f"\n genome→disease Top-1 recovery: {hits1}/{n} = {acc*100:.0f}% "
70
  f"(real pathogenic variant → correct rare disease)")
71
  res = {"top1_recovery": acc, "n": n, "patho_threshold": PATHO_THRESH,
72
- "benign_control_pathogenic": b["alphamissense"] >= PATHO_THRESH, "per_gene": results}
73
  json.dump(res, open("/tmp/genome_to_disease_demo.json", "w"), indent=2)
74
  print(" Saved /tmp/genome_to_disease_demo.json")
75
  print("\n Next (Mayo substrate): condition the world model (Pillar B) on this genomic")
@@ -78,4 +80,4 @@ def _demo(online=True):
78
 
79
 
80
  if __name__ == "__main__":
81
- _demo(online="--offline" not in sys.argv)
 
18
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
19
  from rare_disease_kg import load_kg
20
  from pillar_a_kg_proposer import propose
21
+ from genomic_scorer import top_pathogenic_variant, pathogenicity, VEP_VALIDATION
22
 
23
  # cohort causal gene → ground-truth disease (ORPHA)
24
  GENE_DISEASE = {
 
36
  return out.get("diseases", [])
37
 
38
 
39
+ def _demo():
40
  kg, src = load_kg()
41
  print("=" * 82)
42
  print(f"GEMEO — genome → disease [variant data: real AlphaMissense+ClinVar | KG: {src}]")
43
  print("=" * 82)
44
  hits1, n, results = 0, 0, {}
45
  for gene, truth in GENE_DISEASE.items():
46
+ v = top_pathogenic_variant(gene)
47
+ if not v or v.get("alphamissense") is None:
48
+ print(f" {gene}: no live variant data (offline?) — skipped")
49
  continue
50
  path = v["alphamissense"]
51
  diseases = genome_to_disease(kg, gene, path, k=3)
 
61
  print(f" → genome→disease #1: {top} ({kg.names.get(top, top)}) {flag}")
62
  print(f" evidence: {diseases[0]['evidence']}")
63
 
64
+ # discrimination is validated separately on 30 real ClinVar variants:
65
+ print(f"\n [validation] variant-effect AUROC on {VEP_VALIDATION['n']} real ClinVar "
66
+ f"{VEP_VALIDATION['gene']} variants (P vs B): "
67
+ f"AlphaMissense {VEP_VALIDATION['alphamissense_auroc']} · Evo 2 7b {VEP_VALIDATION['evo2_7b_auroc']} "
68
+ f"(benchmark/results/evo2_vep_fbn1.json)")
69
 
70
  acc = hits1 / n if n else 0.0
71
  print(f"\n genome→disease Top-1 recovery: {hits1}/{n} = {acc*100:.0f}% "
72
  f"(real pathogenic variant → correct rare disease)")
73
  res = {"top1_recovery": acc, "n": n, "patho_threshold": PATHO_THRESH,
74
+ "vep_validation": VEP_VALIDATION, "per_gene": results}
75
  json.dump(res, open("/tmp/genome_to_disease_demo.json", "w"), indent=2)
76
  print(" Saved /tmp/genome_to_disease_demo.json")
77
  print("\n Next (Mayo substrate): condition the world model (Pillar B) on this genomic")
 
80
 
81
 
82
  if __name__ == "__main__":
83
+ _demo()