nileshhanotia commited on
Commit
69a93da
·
verified ·
1 Parent(s): bc143c3

af_handler.py

Browse files
Files changed (1) hide show
  1. af_handler.py +108 -0
af_handler.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """af_handler.py — PeVe v1.1"""
2
+ from __future__ import annotations
3
+ from dataclasses import dataclass, field
4
+ from typing import Optional
5
+ import urllib.request, json
6
+ from config import AF_RARITY_THRESHOLD, AF_HIGH_CONFLICT, GNOMAD_VERSION
7
+
8
+ AF_NUMERIC = "AF_NUMERIC"
9
+ AF_ZERO = "AF_ZERO"
10
+ AF_UNKNOWN = "AF_UNKNOWN"
11
+ AF_UNCERTAIN = "AF_UNCERTAIN"
12
+
13
+ @dataclass
14
+ class AFResult:
15
+ state: str
16
+ global_af: Optional[float]
17
+ subpop_afs: dict
18
+ is_rare: Optional[bool]
19
+ founder_variant_flag: bool
20
+ stratification_warning: Optional[str]
21
+ gnomad_version: str
22
+ flags: list = field(default_factory=list)
23
+
24
+ def satisfies_rarity(self):
25
+ if self.state == AF_NUMERIC and self.global_af is not None:
26
+ return self.global_af < AF_RARITY_THRESHOLD
27
+ if self.state == AF_ZERO:
28
+ return True
29
+ return False
30
+
31
+ def triggers_high_af_conflict(self):
32
+ return self.state == AF_NUMERIC and self.global_af is not None and self.global_af > AF_HIGH_CONFLICT
33
+
34
+ def fetch_af(chrom, pos, ref, alt, ancestry=None):
35
+ vid = f"{chrom}-{pos}-{ref}-{alt}"
36
+ flags = []
37
+ try:
38
+ data = _query_gnomad(vid)
39
+ except Exception as exc:
40
+ flags.append(f"gnomAD query failed: {exc}")
41
+ return AFResult(AF_UNKNOWN, None, {}, None, False, None, GNOMAD_VERSION, flags)
42
+
43
+ if data is None:
44
+ flags.append("Variant absent from gnomAD — treated as AF_UNKNOWN.")
45
+ return AFResult(AF_UNKNOWN, None, {}, None, False, None, GNOMAD_VERSION, flags)
46
+
47
+ global_af = data.get("af")
48
+ subpop_afs = data.get("populations", {})
49
+ coverage_ok = data.get("coverage_ok", True)
50
+
51
+ if global_af is None:
52
+ state = AF_UNKNOWN
53
+ elif global_af == 0.0:
54
+ state = AF_ZERO if coverage_ok else AF_UNCERTAIN
55
+ if not coverage_ok:
56
+ flags.append("AF=0 but coverage insufficient — AF_UNCERTAIN, rarity NOT confirmed.")
57
+ else:
58
+ state = AF_NUMERIC
59
+
60
+ founder_flag, strat_warn = False, None
61
+ if global_af and global_af > 0:
62
+ for pop, paf in subpop_afs.items():
63
+ if paf > 0 and (paf / global_af) > 10 and paf > 0.005:
64
+ founder_flag = True
65
+ strat_warn = (f"Possible founder variant: AF in '{pop}' ({paf:.5f}) is "
66
+ f"{paf/global_af:.0f}x global AF. Use ancestry-matched AF.")
67
+ flags.append(strat_warn)
68
+ break
69
+
70
+ is_rare = (global_af < AF_RARITY_THRESHOLD) if state == AF_NUMERIC and global_af is not None else \
71
+ (True if state == AF_ZERO else None)
72
+
73
+ return AFResult(state, global_af, subpop_afs, is_rare,
74
+ founder_flag, strat_warn, GNOMAD_VERSION, flags)
75
+
76
+ _GQL = """
77
+ query V($vid:String!,$ds:DatasetId!){variant(variantId:$vid,dataset:$ds){
78
+ genome{af populations{id af}}}}
79
+ """
80
+
81
+ def _query_gnomad(vid):
82
+ payload = json.dumps({"query":_GQL,"variables":{"vid":vid,"ds":"gnomad_r4"}}).encode()
83
+ # Fix variable name in query to match
84
+ payload = json.dumps({
85
+ "query": "query V($variantId:String!,$datasetId:DatasetId!){variant(variantId:$variantId,dataset:$datasetId){genome{af populations{id af}}}}",
86
+ "variables": {"variantId": vid, "datasetId": "gnomad_r4"}
87
+ }).encode()
88
+ req = urllib.request.Request(
89
+ "https://gnomad.broadinstitute.org/api",
90
+ data=payload, headers={"Content-Type":"application/json"}, method="POST"
91
+ )
92
+ with urllib.request.urlopen(req, timeout=15) as r:
93
+ body = json.loads(r.read())
94
+ vdata = body.get("data",{}).get("variant")
95
+ if not vdata:
96
+ return None
97
+ genome = vdata.get("genome") or {}
98
+ pops = {p["id"].lower(): p["af"] for p in (genome.get("populations") or []) if p.get("af") is not None}
99
+ return {"af": genome.get("af"), "populations": pops, "coverage_ok": True}
100
+
101
+ def format_af_display(af_result):
102
+ if af_result.state == AF_NUMERIC:
103
+ return f"{af_result.global_af:.6f}"
104
+ if af_result.state == AF_ZERO:
105
+ return "0 (confirmed absent)"
106
+ if af_result.state == AF_UNCERTAIN:
107
+ return "0 (⚠ AF_UNCERTAIN — coverage insufficient)"
108
+ return "Not in gnomAD (AF_UNKNOWN)"