File size: 6,492 Bytes
1656907
 
 
 
 
 
 
 
3e219fa
 
 
 
 
1656907
3e219fa
 
 
 
 
 
30668cd
3e219fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30668cd
1656907
 
 
 
 
30668cd
3e219fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30668cd
3e219fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30668cd
 
 
 
 
 
1656907
 
 
 
 
3e219fa
 
1656907
 
 
 
 
 
 
 
 
 
30668cd
 
 
3e219fa
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
"""Run VariantLens concordance validation and write an auditable JSON report.

Validation mode (`--validation`) flips the engine into the more permissive
ClinGen Bayesian + PP5/BP6-enabled configuration that ClinVar curators
effectively use. The clinical default (strict Table 5, deprecated
PP5/BP6 off) is correct for production but isn't directly comparable to
ClinVar's expert-panel calls.
"""
from __future__ import annotations

import argparse
import asyncio
import json
import os
import time
from collections import Counter
from datetime import UTC, datetime
from pathlib import Path
from typing import Any

DEFAULT_FIXTURE = Path("backend/tests/fixtures/clinvar_validation_set.json")
DEFAULT_OUT = Path("docs/clinical_validation_results.json")
TARGET_CONCORDANCE = 0.85
ALLOW_ADJACENT = True

PARTITION = {
    "Pathogenic": {"Pathogenic", "Likely Pathogenic"} if ALLOW_ADJACENT else {"Pathogenic"},
    "Likely Pathogenic": {"Pathogenic", "Likely Pathogenic"} if ALLOW_ADJACENT else {"Likely Pathogenic"},
    "Uncertain Significance": {"Uncertain Significance"},
    "Likely Benign": {"Benign", "Likely Benign"} if ALLOW_ADJACENT else {"Likely Benign"},
    "Benign": {"Benign", "Likely Benign"} if ALLOW_ADJACENT else {"Benign"},
}


def _expected_to_canonical(value: str) -> str:
    table = {
        "Pathogenic": "Pathogenic",
        "Likely pathogenic": "Likely Pathogenic",
        "Likely Pathogenic": "Likely Pathogenic",
        "Uncertain significance": "Uncertain Significance",
        "Uncertain Significance": "Uncertain Significance",
        "Likely benign": "Likely Benign",
        "Likely Benign": "Likely Benign",
        "Benign": "Benign",
    }
    return table.get((value or "").strip(), value)


def _extract_hgvs(title: str) -> str | None:
    if "(" not in title or ":" not in title:
        return None
    transcript = title.split("(", 1)[0].strip()
    coding = title.split(":", 1)[1].split(" ", 1)[0].rstrip(",")
    return f"{transcript}:{coding}"


async def run_validation(limit: int | None, skip_rag: bool, fixture: Path) -> dict[str, Any]:
    # Imported here so any env override applied in main() (e.g. --validation)
    # is read before Settings is constructed.
    from backend.app.api.pipeline import VariantPipeline
    from backend.app.schemas.variant import VariantInput

    rows = json.loads(fixture.read_text())
    pipeline = VariantPipeline()
    results: list[dict[str, Any]] = []
    confusion: Counter[str] = Counter()
    correct = 0
    total = 0
    started = time.time()

    for index, row in enumerate(rows[:limit], start=1):
        hgvs = _extract_hgvs(row.get("title", ""))
        expected = _expected_to_canonical(row.get("expected_classification", ""))
        if not hgvs or expected not in PARTITION:
            continue

        row_started = time.time()
        try:
            result = await pipeline.run(
                VariantInput(raw=hgvs, gene_symbol=row.get("gene")),
                skip_rag=skip_rag,
            )
            got: str = result.classification.significance
            rationale = result.classification.rationale
            criteria = [
                {
                    "code": c.code,
                    "triggered": c.triggered,
                    "strength": c.strength,
                    "source": c.source,
                    "evidence_text": c.evidence_text,
                    "confidence": c.confidence,
                    "pmid": c.pmid,
                    "caveat": c.caveat,
                }
                for c in result.evidence.criteria
            ]
            error = None
        except Exception as exc:
            got = "ERROR"
            rationale = None
            criteria = []
            error = str(exc)

        elapsed = round(time.time() - row_started, 3)
        match = got in PARTITION.get(expected, set())
        if got != "ERROR":
            total += 1
            if match:
                correct += 1
            confusion[f"{expected} -> {got}"] += 1

        print(f"{index:03d} {hgvs} expected={expected} got={got} match={match} elapsed={elapsed}s")
        results.append({
            "variation_id": row.get("variation_id"),
            "gene": row.get("gene"),
            "hgvs": hgvs,
            "expected": expected,
            "got": got,
            "match": match,
            "elapsed_seconds": elapsed,
            "rationale": rationale,
            "criteria": criteria,
            "error": error,
        })

    concordance = correct / total if total else 0.0
    return {
        "generated_at": datetime.now(UTC).isoformat(),
        "fixture": str(fixture),
        "skip_rag": skip_rag,
        "target_concordance": TARGET_CONCORDANCE,
        "total_scored": total,
        "correct": correct,
        "concordance": concordance,
        "passed_target": concordance >= TARGET_CONCORDANCE,
        "elapsed_seconds": round(time.time() - started, 3),
        "confusion": dict(confusion),
        "results": results,
    }


def main() -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument("--limit", type=int)
    parser.add_argument("--skip-rag", action="store_true")
    parser.add_argument("--out", type=Path, default=DEFAULT_OUT)
    parser.add_argument(
        "--fixture",
        type=Path,
        default=DEFAULT_FIXTURE,
        help="Path to the ClinVar fixture JSON (default: 100-variant set).",
    )
    parser.add_argument(
        "--validation",
        action="store_true",
        help="Use validation-mode config (Bayesian combiner + PP5/BP6 enabled).",
    )
    args = parser.parse_args()

    if args.validation:
        os.environ["ACMG_COMBINER_STRATEGY"] = "bayesian"
        os.environ["ENABLE_DEPRECATED_CLINVAR_CRITERIA"] = "true"
        # Settings is lru_cached — clear so subsequent imports see the override
        try:
            from backend.app.config import get_settings
            get_settings.cache_clear()
        except Exception:
            pass

    report = asyncio.run(run_validation(
        limit=args.limit, skip_rag=args.skip_rag, fixture=args.fixture,
    ))
    args.out.parent.mkdir(parents=True, exist_ok=True)
    args.out.write_text(json.dumps(report, indent=2) + "\n")
    print(
        f"Concordance: {report['correct']}/{report['total_scored']} = "
        f"{report['concordance']:.1%}; wrote {args.out}"
    )
    return 0 if report["passed_target"] else 1


if __name__ == "__main__":
    raise SystemExit(main())