Spaces:

theodabos
/

varientlens

Sleeping

varientlens / scripts /run_validation.py

Codex

Add 1000-variant ClinVar validation fixture + stratified seeding script

30668cd 16 days ago

6.49 kB

	"""Run VariantLens concordance validation and write an auditable JSON report.

	Validation mode (`--validation`) flips the engine into the more permissive
	ClinGen Bayesian + PP5/BP6-enabled configuration that ClinVar curators
	effectively use. The clinical default (strict Table 5, deprecated
	PP5/BP6 off) is correct for production but isn't directly comparable to
	ClinVar's expert-panel calls.
	"""
	from __future__ import annotations

	import argparse
	import asyncio
	import json
	import os
	import time
	from collections import Counter
	from datetime import UTC, datetime
	from pathlib import Path
	from typing import Any

	DEFAULT_FIXTURE = Path("backend/tests/fixtures/clinvar_validation_set.json")
	DEFAULT_OUT = Path("docs/clinical_validation_results.json")
	TARGET_CONCORDANCE = 0.85
	ALLOW_ADJACENT = True

	PARTITION = {
	"Pathogenic": {"Pathogenic", "Likely Pathogenic"} if ALLOW_ADJACENT else {"Pathogenic"},
	"Likely Pathogenic": {"Pathogenic", "Likely Pathogenic"} if ALLOW_ADJACENT else {"Likely Pathogenic"},
	"Uncertain Significance": {"Uncertain Significance"},
	"Likely Benign": {"Benign", "Likely Benign"} if ALLOW_ADJACENT else {"Likely Benign"},
	"Benign": {"Benign", "Likely Benign"} if ALLOW_ADJACENT else {"Benign"},
	}


	def _expected_to_canonical(value: str) -> str:
	table = {
	"Pathogenic": "Pathogenic",
	"Likely pathogenic": "Likely Pathogenic",
	"Likely Pathogenic": "Likely Pathogenic",
	"Uncertain significance": "Uncertain Significance",
	"Uncertain Significance": "Uncertain Significance",
	"Likely benign": "Likely Benign",
	"Likely Benign": "Likely Benign",
	"Benign": "Benign",
	}
	return table.get((value or "").strip(), value)


	def _extract_hgvs(title: str) -> str \| None:
	if "(" not in title or ":" not in title:
	return None
	transcript = title.split("(", 1)[0].strip()
	coding = title.split(":", 1)[1].split(" ", 1)[0].rstrip(",")
	return f"{transcript}:{coding}"


	async def run_validation(limit: int \| None, skip_rag: bool, fixture: Path) -> dict[str, Any]:
	# Imported here so any env override applied in main() (e.g. --validation)
	# is read before Settings is constructed.
	from backend.app.api.pipeline import VariantPipeline
	from backend.app.schemas.variant import VariantInput

	rows = json.loads(fixture.read_text())
	pipeline = VariantPipeline()
	results: list[dict[str, Any]] = []
	confusion: Counter[str] = Counter()
	correct = 0
	total = 0
	started = time.time()

	for index, row in enumerate(rows[:limit], start=1):
	hgvs = _extract_hgvs(row.get("title", ""))
	expected = _expected_to_canonical(row.get("expected_classification", ""))
	if not hgvs or expected not in PARTITION:
	continue

	row_started = time.time()
	try:
	result = await pipeline.run(
	VariantInput(raw=hgvs, gene_symbol=row.get("gene")),
	skip_rag=skip_rag,
	)
	got: str = result.classification.significance
	rationale = result.classification.rationale
	criteria = [
	{
	"code": c.code,
	"triggered": c.triggered,
	"strength": c.strength,
	"source": c.source,
	"evidence_text": c.evidence_text,
	"confidence": c.confidence,
	"pmid": c.pmid,
	"caveat": c.caveat,
	}
	for c in result.evidence.criteria
	]
	error = None
	except Exception as exc:
	got = "ERROR"
	rationale = None
	criteria = []
	error = str(exc)

	elapsed = round(time.time() - row_started, 3)
	match = got in PARTITION.get(expected, set())
	if got != "ERROR":
	total += 1
	if match:
	correct += 1
	confusion[f"{expected} -> {got}"] += 1

	print(f"{index:03d} {hgvs} expected={expected} got={got} match={match} elapsed={elapsed}s")
	results.append({
	"variation_id": row.get("variation_id"),
	"gene": row.get("gene"),
	"hgvs": hgvs,
	"expected": expected,
	"got": got,
	"match": match,
	"elapsed_seconds": elapsed,
	"rationale": rationale,
	"criteria": criteria,
	"error": error,
	})

	concordance = correct / total if total else 0.0
	return {
	"generated_at": datetime.now(UTC).isoformat(),
	"fixture": str(fixture),
	"skip_rag": skip_rag,
	"target_concordance": TARGET_CONCORDANCE,
	"total_scored": total,
	"correct": correct,
	"concordance": concordance,
	"passed_target": concordance >= TARGET_CONCORDANCE,
	"elapsed_seconds": round(time.time() - started, 3),
	"confusion": dict(confusion),
	"results": results,
	}


	def main() -> int:
	parser = argparse.ArgumentParser()
	parser.add_argument("--limit", type=int)
	parser.add_argument("--skip-rag", action="store_true")
	parser.add_argument("--out", type=Path, default=DEFAULT_OUT)
	parser.add_argument(
	"--fixture",
	type=Path,
	default=DEFAULT_FIXTURE,
	help="Path to the ClinVar fixture JSON (default: 100-variant set).",
	)
	parser.add_argument(
	"--validation",
	action="store_true",
	help="Use validation-mode config (Bayesian combiner + PP5/BP6 enabled).",
	)
	args = parser.parse_args()

	if args.validation:
	os.environ["ACMG_COMBINER_STRATEGY"] = "bayesian"
	os.environ["ENABLE_DEPRECATED_CLINVAR_CRITERIA"] = "true"
	# Settings is lru_cached — clear so subsequent imports see the override
	try:
	from backend.app.config import get_settings
	get_settings.cache_clear()
	except Exception:
	pass

	report = asyncio.run(run_validation(
	limit=args.limit, skip_rag=args.skip_rag, fixture=args.fixture,
	))
	args.out.parent.mkdir(parents=True, exist_ok=True)
	args.out.write_text(json.dumps(report, indent=2) + "\n")
	print(
	f"Concordance: {report['correct']}/{report['total_scored']} = "
	f"{report['concordance']:.1%}; wrote {args.out}"
	)
	return 0 if report["passed_target"] else 1


	if __name__ == "__main__":
	raise SystemExit(main())