Spaces:
Runtime error
Runtime error
af_handler.py
Browse files- af_handler.py +108 -0
af_handler.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""af_handler.py — PeVe v1.1"""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
from dataclasses import dataclass, field
|
| 4 |
+
from typing import Optional
|
| 5 |
+
import urllib.request, json
|
| 6 |
+
from config import AF_RARITY_THRESHOLD, AF_HIGH_CONFLICT, GNOMAD_VERSION
|
| 7 |
+
|
| 8 |
+
AF_NUMERIC = "AF_NUMERIC"
|
| 9 |
+
AF_ZERO = "AF_ZERO"
|
| 10 |
+
AF_UNKNOWN = "AF_UNKNOWN"
|
| 11 |
+
AF_UNCERTAIN = "AF_UNCERTAIN"
|
| 12 |
+
|
| 13 |
+
@dataclass
|
| 14 |
+
class AFResult:
|
| 15 |
+
state: str
|
| 16 |
+
global_af: Optional[float]
|
| 17 |
+
subpop_afs: dict
|
| 18 |
+
is_rare: Optional[bool]
|
| 19 |
+
founder_variant_flag: bool
|
| 20 |
+
stratification_warning: Optional[str]
|
| 21 |
+
gnomad_version: str
|
| 22 |
+
flags: list = field(default_factory=list)
|
| 23 |
+
|
| 24 |
+
def satisfies_rarity(self):
|
| 25 |
+
if self.state == AF_NUMERIC and self.global_af is not None:
|
| 26 |
+
return self.global_af < AF_RARITY_THRESHOLD
|
| 27 |
+
if self.state == AF_ZERO:
|
| 28 |
+
return True
|
| 29 |
+
return False
|
| 30 |
+
|
| 31 |
+
def triggers_high_af_conflict(self):
|
| 32 |
+
return self.state == AF_NUMERIC and self.global_af is not None and self.global_af > AF_HIGH_CONFLICT
|
| 33 |
+
|
| 34 |
+
def fetch_af(chrom, pos, ref, alt, ancestry=None):
|
| 35 |
+
vid = f"{chrom}-{pos}-{ref}-{alt}"
|
| 36 |
+
flags = []
|
| 37 |
+
try:
|
| 38 |
+
data = _query_gnomad(vid)
|
| 39 |
+
except Exception as exc:
|
| 40 |
+
flags.append(f"gnomAD query failed: {exc}")
|
| 41 |
+
return AFResult(AF_UNKNOWN, None, {}, None, False, None, GNOMAD_VERSION, flags)
|
| 42 |
+
|
| 43 |
+
if data is None:
|
| 44 |
+
flags.append("Variant absent from gnomAD — treated as AF_UNKNOWN.")
|
| 45 |
+
return AFResult(AF_UNKNOWN, None, {}, None, False, None, GNOMAD_VERSION, flags)
|
| 46 |
+
|
| 47 |
+
global_af = data.get("af")
|
| 48 |
+
subpop_afs = data.get("populations", {})
|
| 49 |
+
coverage_ok = data.get("coverage_ok", True)
|
| 50 |
+
|
| 51 |
+
if global_af is None:
|
| 52 |
+
state = AF_UNKNOWN
|
| 53 |
+
elif global_af == 0.0:
|
| 54 |
+
state = AF_ZERO if coverage_ok else AF_UNCERTAIN
|
| 55 |
+
if not coverage_ok:
|
| 56 |
+
flags.append("AF=0 but coverage insufficient — AF_UNCERTAIN, rarity NOT confirmed.")
|
| 57 |
+
else:
|
| 58 |
+
state = AF_NUMERIC
|
| 59 |
+
|
| 60 |
+
founder_flag, strat_warn = False, None
|
| 61 |
+
if global_af and global_af > 0:
|
| 62 |
+
for pop, paf in subpop_afs.items():
|
| 63 |
+
if paf > 0 and (paf / global_af) > 10 and paf > 0.005:
|
| 64 |
+
founder_flag = True
|
| 65 |
+
strat_warn = (f"Possible founder variant: AF in '{pop}' ({paf:.5f}) is "
|
| 66 |
+
f"{paf/global_af:.0f}x global AF. Use ancestry-matched AF.")
|
| 67 |
+
flags.append(strat_warn)
|
| 68 |
+
break
|
| 69 |
+
|
| 70 |
+
is_rare = (global_af < AF_RARITY_THRESHOLD) if state == AF_NUMERIC and global_af is not None else \
|
| 71 |
+
(True if state == AF_ZERO else None)
|
| 72 |
+
|
| 73 |
+
return AFResult(state, global_af, subpop_afs, is_rare,
|
| 74 |
+
founder_flag, strat_warn, GNOMAD_VERSION, flags)
|
| 75 |
+
|
| 76 |
+
_GQL = """
|
| 77 |
+
query V($vid:String!,$ds:DatasetId!){variant(variantId:$vid,dataset:$ds){
|
| 78 |
+
genome{af populations{id af}}}}
|
| 79 |
+
"""
|
| 80 |
+
|
| 81 |
+
def _query_gnomad(vid):
|
| 82 |
+
payload = json.dumps({"query":_GQL,"variables":{"vid":vid,"ds":"gnomad_r4"}}).encode()
|
| 83 |
+
# Fix variable name in query to match
|
| 84 |
+
payload = json.dumps({
|
| 85 |
+
"query": "query V($variantId:String!,$datasetId:DatasetId!){variant(variantId:$variantId,dataset:$datasetId){genome{af populations{id af}}}}",
|
| 86 |
+
"variables": {"variantId": vid, "datasetId": "gnomad_r4"}
|
| 87 |
+
}).encode()
|
| 88 |
+
req = urllib.request.Request(
|
| 89 |
+
"https://gnomad.broadinstitute.org/api",
|
| 90 |
+
data=payload, headers={"Content-Type":"application/json"}, method="POST"
|
| 91 |
+
)
|
| 92 |
+
with urllib.request.urlopen(req, timeout=15) as r:
|
| 93 |
+
body = json.loads(r.read())
|
| 94 |
+
vdata = body.get("data",{}).get("variant")
|
| 95 |
+
if not vdata:
|
| 96 |
+
return None
|
| 97 |
+
genome = vdata.get("genome") or {}
|
| 98 |
+
pops = {p["id"].lower(): p["af"] for p in (genome.get("populations") or []) if p.get("af") is not None}
|
| 99 |
+
return {"af": genome.get("af"), "populations": pops, "coverage_ok": True}
|
| 100 |
+
|
| 101 |
+
def format_af_display(af_result):
|
| 102 |
+
if af_result.state == AF_NUMERIC:
|
| 103 |
+
return f"{af_result.global_af:.6f}"
|
| 104 |
+
if af_result.state == AF_ZERO:
|
| 105 |
+
return "0 (confirmed absent)"
|
| 106 |
+
if af_result.state == AF_UNCERTAIN:
|
| 107 |
+
return "0 (⚠ AF_UNCERTAIN — coverage insufficient)"
|
| 108 |
+
return "Not in gnomAD (AF_UNKNOWN)"
|