"""af_handler.py — PeVe v1.1""" from __future__ import annotations from dataclasses import dataclass, field from typing import Optional import urllib.request, json from config import AF_RARITY_THRESHOLD, AF_HIGH_CONFLICT, GNOMAD_VERSION AF_NUMERIC = "AF_NUMERIC" AF_ZERO = "AF_ZERO" AF_UNKNOWN = "AF_UNKNOWN" AF_UNCERTAIN = "AF_UNCERTAIN" @dataclass class AFResult: state: str global_af: Optional[float] subpop_afs: dict is_rare: Optional[bool] founder_variant_flag: bool stratification_warning: Optional[str] gnomad_version: str flags: list = field(default_factory=list) def satisfies_rarity(self): if self.state == AF_NUMERIC and self.global_af is not None: return self.global_af < AF_RARITY_THRESHOLD if self.state == AF_ZERO: return True return False def triggers_high_af_conflict(self): return self.state == AF_NUMERIC and self.global_af is not None and self.global_af > AF_HIGH_CONFLICT def fetch_af(chrom, pos, ref, alt, ancestry=None): vid = f"{chrom}-{pos}-{ref}-{alt}" flags = [] try: data = _query_gnomad(vid) except Exception as exc: flags.append(f"gnomAD query failed: {exc}") return AFResult(AF_UNKNOWN, None, {}, None, False, None, GNOMAD_VERSION, flags) if data is None: flags.append("Variant absent from gnomAD — treated as AF_UNKNOWN.") return AFResult(AF_UNKNOWN, None, {}, None, False, None, GNOMAD_VERSION, flags) global_af = data.get("af") subpop_afs = data.get("populations", {}) coverage_ok = data.get("coverage_ok", True) if global_af is None: state = AF_UNKNOWN elif global_af == 0.0: state = AF_ZERO if coverage_ok else AF_UNCERTAIN if not coverage_ok: flags.append("AF=0 but coverage insufficient — AF_UNCERTAIN, rarity NOT confirmed.") else: state = AF_NUMERIC founder_flag, strat_warn = False, None if global_af and global_af > 0: for pop, paf in subpop_afs.items(): if paf > 0 and (paf / global_af) > 10 and paf > 0.005: founder_flag = True strat_warn = (f"Possible founder variant: AF in '{pop}' ({paf:.5f}) is " f"{paf/global_af:.0f}x global AF. Use ancestry-matched AF.") flags.append(strat_warn) break is_rare = (global_af < AF_RARITY_THRESHOLD) if state == AF_NUMERIC and global_af is not None else \ (True if state == AF_ZERO else None) return AFResult(state, global_af, subpop_afs, is_rare, founder_flag, strat_warn, GNOMAD_VERSION, flags) _GQL = """ query V($vid:String!,$ds:DatasetId!){variant(variantId:$vid,dataset:$ds){ genome{af populations{id af}}}} """ def _query_gnomad(vid): payload = json.dumps({"query":_GQL,"variables":{"vid":vid,"ds":"gnomad_r4"}}).encode() # Fix variable name in query to match payload = json.dumps({ "query": "query V($variantId:String!,$datasetId:DatasetId!){variant(variantId:$variantId,dataset:$datasetId){genome{af populations{id af}}}}", "variables": {"variantId": vid, "datasetId": "gnomad_r4"} }).encode() req = urllib.request.Request( "https://gnomad.broadinstitute.org/api", data=payload, headers={"Content-Type":"application/json"}, method="POST" ) with urllib.request.urlopen(req, timeout=15) as r: body = json.loads(r.read()) vdata = body.get("data",{}).get("variant") if not vdata: return None genome = vdata.get("genome") or {} pops = {p["id"].lower(): p["af"] for p in (genome.get("populations") or []) if p.get("af") is not None} return {"af": genome.get("af"), "populations": pops, "coverage_ok": True} def format_af_display(af_result): if af_result.state == AF_NUMERIC: return f"{af_result.global_af:.6f}" if af_result.state == AF_ZERO: return "0 (confirmed absent)" if af_result.state == AF_UNCERTAIN: return "0 (⚠ AF_UNCERTAIN — coverage insufficient)" return "Not in gnomAD (AF_UNKNOWN)"