File size: 4,144 Bytes
69a93da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
"""af_handler.py — PeVe v1.1"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Optional
import urllib.request, json
from config import AF_RARITY_THRESHOLD, AF_HIGH_CONFLICT, GNOMAD_VERSION

AF_NUMERIC   = "AF_NUMERIC"
AF_ZERO      = "AF_ZERO"
AF_UNKNOWN   = "AF_UNKNOWN"
AF_UNCERTAIN = "AF_UNCERTAIN"

@dataclass
class AFResult:
    state: str
    global_af: Optional[float]
    subpop_afs: dict
    is_rare: Optional[bool]
    founder_variant_flag: bool
    stratification_warning: Optional[str]
    gnomad_version: str
    flags: list = field(default_factory=list)

    def satisfies_rarity(self):
        if self.state == AF_NUMERIC and self.global_af is not None:
            return self.global_af < AF_RARITY_THRESHOLD
        if self.state == AF_ZERO:
            return True
        return False

    def triggers_high_af_conflict(self):
        return self.state == AF_NUMERIC and self.global_af is not None and self.global_af > AF_HIGH_CONFLICT

def fetch_af(chrom, pos, ref, alt, ancestry=None):
    vid = f"{chrom}-{pos}-{ref}-{alt}"
    flags = []
    try:
        data = _query_gnomad(vid)
    except Exception as exc:
        flags.append(f"gnomAD query failed: {exc}")
        return AFResult(AF_UNKNOWN, None, {}, None, False, None, GNOMAD_VERSION, flags)

    if data is None:
        flags.append("Variant absent from gnomAD — treated as AF_UNKNOWN.")
        return AFResult(AF_UNKNOWN, None, {}, None, False, None, GNOMAD_VERSION, flags)

    global_af   = data.get("af")
    subpop_afs  = data.get("populations", {})
    coverage_ok = data.get("coverage_ok", True)

    if global_af is None:
        state = AF_UNKNOWN
    elif global_af == 0.0:
        state = AF_ZERO if coverage_ok else AF_UNCERTAIN
        if not coverage_ok:
            flags.append("AF=0 but coverage insufficient — AF_UNCERTAIN, rarity NOT confirmed.")
    else:
        state = AF_NUMERIC

    founder_flag, strat_warn = False, None
    if global_af and global_af > 0:
        for pop, paf in subpop_afs.items():
            if paf > 0 and (paf / global_af) > 10 and paf > 0.005:
                founder_flag = True
                strat_warn = (f"Possible founder variant: AF in '{pop}' ({paf:.5f}) is "
                              f"{paf/global_af:.0f}x global AF. Use ancestry-matched AF.")
                flags.append(strat_warn)
                break

    is_rare = (global_af < AF_RARITY_THRESHOLD) if state == AF_NUMERIC and global_af is not None else \
              (True if state == AF_ZERO else None)

    return AFResult(state, global_af, subpop_afs, is_rare,
                    founder_flag, strat_warn, GNOMAD_VERSION, flags)

_GQL = """
query V($vid:String!,$ds:DatasetId!){variant(variantId:$vid,dataset:$ds){
  genome{af populations{id af}}}}
"""

def _query_gnomad(vid):
    payload = json.dumps({"query":_GQL,"variables":{"vid":vid,"ds":"gnomad_r4"}}).encode()
    # Fix variable name in query to match
    payload = json.dumps({
        "query": "query V($variantId:String!,$datasetId:DatasetId!){variant(variantId:$variantId,dataset:$datasetId){genome{af populations{id af}}}}",
        "variables": {"variantId": vid, "datasetId": "gnomad_r4"}
    }).encode()
    req = urllib.request.Request(
        "https://gnomad.broadinstitute.org/api",
        data=payload, headers={"Content-Type":"application/json"}, method="POST"
    )
    with urllib.request.urlopen(req, timeout=15) as r:
        body = json.loads(r.read())
    vdata = body.get("data",{}).get("variant")
    if not vdata:
        return None
    genome = vdata.get("genome") or {}
    pops   = {p["id"].lower(): p["af"] for p in (genome.get("populations") or []) if p.get("af") is not None}
    return {"af": genome.get("af"), "populations": pops, "coverage_ok": True}

def format_af_display(af_result):
    if af_result.state == AF_NUMERIC:
        return f"{af_result.global_af:.6f}"
    if af_result.state == AF_ZERO:
        return "0 (confirmed absent)"
    if af_result.state == AF_UNCERTAIN:
        return "0 (⚠ AF_UNCERTAIN — coverage insufficient)"
    return "Not in gnomAD (AF_UNKNOWN)"