Mutation_XAI / af_handler.py
nileshhanotia's picture
af_handler.py
69a93da verified
"""af_handler.py — PeVe v1.1"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Optional
import urllib.request, json
from config import AF_RARITY_THRESHOLD, AF_HIGH_CONFLICT, GNOMAD_VERSION
AF_NUMERIC = "AF_NUMERIC"
AF_ZERO = "AF_ZERO"
AF_UNKNOWN = "AF_UNKNOWN"
AF_UNCERTAIN = "AF_UNCERTAIN"
@dataclass
class AFResult:
state: str
global_af: Optional[float]
subpop_afs: dict
is_rare: Optional[bool]
founder_variant_flag: bool
stratification_warning: Optional[str]
gnomad_version: str
flags: list = field(default_factory=list)
def satisfies_rarity(self):
if self.state == AF_NUMERIC and self.global_af is not None:
return self.global_af < AF_RARITY_THRESHOLD
if self.state == AF_ZERO:
return True
return False
def triggers_high_af_conflict(self):
return self.state == AF_NUMERIC and self.global_af is not None and self.global_af > AF_HIGH_CONFLICT
def fetch_af(chrom, pos, ref, alt, ancestry=None):
vid = f"{chrom}-{pos}-{ref}-{alt}"
flags = []
try:
data = _query_gnomad(vid)
except Exception as exc:
flags.append(f"gnomAD query failed: {exc}")
return AFResult(AF_UNKNOWN, None, {}, None, False, None, GNOMAD_VERSION, flags)
if data is None:
flags.append("Variant absent from gnomAD — treated as AF_UNKNOWN.")
return AFResult(AF_UNKNOWN, None, {}, None, False, None, GNOMAD_VERSION, flags)
global_af = data.get("af")
subpop_afs = data.get("populations", {})
coverage_ok = data.get("coverage_ok", True)
if global_af is None:
state = AF_UNKNOWN
elif global_af == 0.0:
state = AF_ZERO if coverage_ok else AF_UNCERTAIN
if not coverage_ok:
flags.append("AF=0 but coverage insufficient — AF_UNCERTAIN, rarity NOT confirmed.")
else:
state = AF_NUMERIC
founder_flag, strat_warn = False, None
if global_af and global_af > 0:
for pop, paf in subpop_afs.items():
if paf > 0 and (paf / global_af) > 10 and paf > 0.005:
founder_flag = True
strat_warn = (f"Possible founder variant: AF in '{pop}' ({paf:.5f}) is "
f"{paf/global_af:.0f}x global AF. Use ancestry-matched AF.")
flags.append(strat_warn)
break
is_rare = (global_af < AF_RARITY_THRESHOLD) if state == AF_NUMERIC and global_af is not None else \
(True if state == AF_ZERO else None)
return AFResult(state, global_af, subpop_afs, is_rare,
founder_flag, strat_warn, GNOMAD_VERSION, flags)
_GQL = """
query V($vid:String!,$ds:DatasetId!){variant(variantId:$vid,dataset:$ds){
genome{af populations{id af}}}}
"""
def _query_gnomad(vid):
payload = json.dumps({"query":_GQL,"variables":{"vid":vid,"ds":"gnomad_r4"}}).encode()
# Fix variable name in query to match
payload = json.dumps({
"query": "query V($variantId:String!,$datasetId:DatasetId!){variant(variantId:$variantId,dataset:$datasetId){genome{af populations{id af}}}}",
"variables": {"variantId": vid, "datasetId": "gnomad_r4"}
}).encode()
req = urllib.request.Request(
"https://gnomad.broadinstitute.org/api",
data=payload, headers={"Content-Type":"application/json"}, method="POST"
)
with urllib.request.urlopen(req, timeout=15) as r:
body = json.loads(r.read())
vdata = body.get("data",{}).get("variant")
if not vdata:
return None
genome = vdata.get("genome") or {}
pops = {p["id"].lower(): p["af"] for p in (genome.get("populations") or []) if p.get("af") is not None}
return {"af": genome.get("af"), "populations": pops, "coverage_ok": True}
def format_af_display(af_result):
if af_result.state == AF_NUMERIC:
return f"{af_result.global_af:.6f}"
if af_result.state == AF_ZERO:
return "0 (confirmed absent)"
if af_result.state == AF_UNCERTAIN:
return "0 (⚠ AF_UNCERTAIN — coverage insufficient)"
return "Not in gnomAD (AF_UNKNOWN)"