Upload genome_to_disease.py with huggingface_hub
Browse files- genome_to_disease.py +12 -10
genome_to_disease.py
CHANGED
|
@@ -18,7 +18,7 @@ import json, os, sys
|
|
| 18 |
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 19 |
from rare_disease_kg import load_kg
|
| 20 |
from pillar_a_kg_proposer import propose
|
| 21 |
-
from genomic_scorer import top_pathogenic_variant, pathogenicity
|
| 22 |
|
| 23 |
# cohort causal gene → ground-truth disease (ORPHA)
|
| 24 |
GENE_DISEASE = {
|
|
@@ -36,15 +36,16 @@ def genome_to_disease(kg, gene, variant_path, k=3):
|
|
| 36 |
return out.get("diseases", [])
|
| 37 |
|
| 38 |
|
| 39 |
-
def _demo(
|
| 40 |
kg, src = load_kg()
|
| 41 |
print("=" * 82)
|
| 42 |
print(f"GEMEO — genome → disease [variant data: real AlphaMissense+ClinVar | KG: {src}]")
|
| 43 |
print("=" * 82)
|
| 44 |
hits1, n, results = 0, 0, {}
|
| 45 |
for gene, truth in GENE_DISEASE.items():
|
| 46 |
-
v = top_pathogenic_variant(gene
|
| 47 |
-
if not v:
|
|
|
|
| 48 |
continue
|
| 49 |
path = v["alphamissense"]
|
| 50 |
diseases = genome_to_disease(kg, gene, path, k=3)
|
|
@@ -60,16 +61,17 @@ def _demo(online=True):
|
|
| 60 |
print(f" → genome→disease #1: {top} ({kg.names.get(top, top)}) {flag}")
|
| 61 |
print(f" evidence: {diseases[0]['evidence']}")
|
| 62 |
|
| 63 |
-
#
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
f"
|
|
|
|
| 67 |
|
| 68 |
acc = hits1 / n if n else 0.0
|
| 69 |
print(f"\n genome→disease Top-1 recovery: {hits1}/{n} = {acc*100:.0f}% "
|
| 70 |
f"(real pathogenic variant → correct rare disease)")
|
| 71 |
res = {"top1_recovery": acc, "n": n, "patho_threshold": PATHO_THRESH,
|
| 72 |
-
"
|
| 73 |
json.dump(res, open("/tmp/genome_to_disease_demo.json", "w"), indent=2)
|
| 74 |
print(" Saved /tmp/genome_to_disease_demo.json")
|
| 75 |
print("\n Next (Mayo substrate): condition the world model (Pillar B) on this genomic")
|
|
@@ -78,4 +80,4 @@ def _demo(online=True):
|
|
| 78 |
|
| 79 |
|
| 80 |
if __name__ == "__main__":
|
| 81 |
-
_demo(
|
|
|
|
| 18 |
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 19 |
from rare_disease_kg import load_kg
|
| 20 |
from pillar_a_kg_proposer import propose
|
| 21 |
+
from genomic_scorer import top_pathogenic_variant, pathogenicity, VEP_VALIDATION
|
| 22 |
|
| 23 |
# cohort causal gene → ground-truth disease (ORPHA)
|
| 24 |
GENE_DISEASE = {
|
|
|
|
| 36 |
return out.get("diseases", [])
|
| 37 |
|
| 38 |
|
| 39 |
+
def _demo():
|
| 40 |
kg, src = load_kg()
|
| 41 |
print("=" * 82)
|
| 42 |
print(f"GEMEO — genome → disease [variant data: real AlphaMissense+ClinVar | KG: {src}]")
|
| 43 |
print("=" * 82)
|
| 44 |
hits1, n, results = 0, 0, {}
|
| 45 |
for gene, truth in GENE_DISEASE.items():
|
| 46 |
+
v = top_pathogenic_variant(gene)
|
| 47 |
+
if not v or v.get("alphamissense") is None:
|
| 48 |
+
print(f" {gene}: no live variant data (offline?) — skipped")
|
| 49 |
continue
|
| 50 |
path = v["alphamissense"]
|
| 51 |
diseases = genome_to_disease(kg, gene, path, k=3)
|
|
|
|
| 61 |
print(f" → genome→disease #1: {top} ({kg.names.get(top, top)}) {flag}")
|
| 62 |
print(f" evidence: {diseases[0]['evidence']}")
|
| 63 |
|
| 64 |
+
# discrimination is validated separately on 30 real ClinVar variants:
|
| 65 |
+
print(f"\n [validation] variant-effect AUROC on {VEP_VALIDATION['n']} real ClinVar "
|
| 66 |
+
f"{VEP_VALIDATION['gene']} variants (P vs B): "
|
| 67 |
+
f"AlphaMissense {VEP_VALIDATION['alphamissense_auroc']} · Evo 2 7b {VEP_VALIDATION['evo2_7b_auroc']} "
|
| 68 |
+
f"(benchmark/results/evo2_vep_fbn1.json)")
|
| 69 |
|
| 70 |
acc = hits1 / n if n else 0.0
|
| 71 |
print(f"\n genome→disease Top-1 recovery: {hits1}/{n} = {acc*100:.0f}% "
|
| 72 |
f"(real pathogenic variant → correct rare disease)")
|
| 73 |
res = {"top1_recovery": acc, "n": n, "patho_threshold": PATHO_THRESH,
|
| 74 |
+
"vep_validation": VEP_VALIDATION, "per_gene": results}
|
| 75 |
json.dump(res, open("/tmp/genome_to_disease_demo.json", "w"), indent=2)
|
| 76 |
print(" Saved /tmp/genome_to_disease_demo.json")
|
| 77 |
print("\n Next (Mayo substrate): condition the world model (Pillar B) on this genomic")
|
|
|
|
| 80 |
|
| 81 |
|
| 82 |
if __name__ == "__main__":
|
| 83 |
+
_demo()
|