varientlens / backend /tests /test_rules_engine.py
Codex
ACMG rule fixes: PP2 implementation, BP4 AM-concordance, PVS1 consequence-gate
e5293e2
from backend.app.schemas.evidence import (
ACMGCriterion,
AutoPVS1Result,
AutoPVS1Step,
ClinVarResidueNeighbor,
ClinVarSubmission,
EvidenceBundle,
InSilicoResult,
PopulationFrequency,
)
from backend.app.services.acmg.rules import RuleEngine
def test_ba1_triggers_above_5pct() -> None:
e = RuleEngine()
criteria = e.score_population(PopulationFrequency(overall_af=0.06))
codes = [c.code for c in criteria]
assert "BA1" in codes
def test_pm2_triggers_below_threshold() -> None:
e = RuleEngine()
criteria = e.score_population(PopulationFrequency(overall_af=0.00001))
codes = [c.code for c in criteria]
assert "PM2" in codes
def test_pm2_fires_supporting_by_default() -> None:
"""PM2 fires at SUPPORTING by default (ClinGen SVI 2020). The
score_all post-process step upgrades to MODERATE only when corroborating
pathogenic evidence is present (see test_pm2_upgraded_when_path_support)."""
e = RuleEngine()
criteria = e.score_population(PopulationFrequency(overall_af=0.00001))
pm2 = next(c for c in criteria if c.code == "PM2")
assert pm2.strength == "supporting"
def test_pm2_upgraded_when_strong_path_support() -> None:
"""When PVS1 (or PP5_strong, or PS1, etc.) also fires, PM2 should
upgrade to MODERATE so the Bayesian total crosses the LP threshold.
This was the root cause of the 80 pathogenic→VUS misses on the
1000-variant fixture: PM2 + PP5_strong = +5 Bayesian points, just
below the +6 LP threshold."""
e = RuleEngine()
bundle = EvidenceBundle(
population_frequency=PopulationFrequency(overall_af=0.00001),
autopvs1=_autopvs1(),
)
crits = e.score_all(bundle, gene_symbol="BRCA1", consequence="stop_gained")
pm2 = next(c for c in crits if c.code == "PM2")
assert pm2.strength == "moderate"
assert "upgraded" in (pm2.evidence_text or "").lower()
def test_pm2_stays_supporting_when_no_path_support() -> None:
"""A rare variant with NO other pathogenic criteria firing must NOT
upgrade PM2 — would over-classify borderline-LB variants to VUS."""
e = RuleEngine()
bundle = EvidenceBundle(
population_frequency=PopulationFrequency(overall_af=0.00001),
insilico=InSilicoResult(revel=0.05, bp4_triggered=True), # benign in-silico, no path
)
crits = e.score_all(bundle)
pm2 = next(c for c in crits if c.code == "PM2")
assert pm2.strength == "supporting"
def test_pm2_stays_supporting_when_only_weak_path_support() -> None:
"""Only weak path evidence (PP3 supporting) is not enough to upgrade —
avoids stacking two weak criteria into a moderate."""
e = RuleEngine()
bundle = EvidenceBundle(
population_frequency=PopulationFrequency(overall_af=0.00001),
insilico=InSilicoResult(revel=0.5, alphamissense=0.7, pp3_triggered=True),
)
crits = e.score_all(bundle)
pm2 = next(c for c in crits if c.code == "PM2")
assert pm2.strength == "supporting"
def test_bs2_triggers_with_homozygotes() -> None:
e = RuleEngine()
criteria = e.score_population(PopulationFrequency(overall_af=0.001, homozygote_count=5))
codes = [c.code for c in criteria]
assert "BS2" in codes
def test_pp3_triggers_when_concordant_pathogenic() -> None:
e = RuleEngine()
ins = InSilicoResult(revel=0.9, alphamissense=0.8, spliceai_max=0.6, pp3_triggered=True)
criteria = e.score_insilico(ins)
codes = [c.code for c in criteria]
assert "PP3" in codes
def _autopvs1(triggered: bool = True, caveats: list[str] | None = None) -> AutoPVS1Result:
return AutoPVS1Result(
triggered=triggered,
strength="very_strong",
rule="PVS1",
reasoning=[AutoPVS1Step(step=1, label="Variant type", value="Stop-gained", **{"pass": True})],
conclusion="PVS1 triggered" if triggered else "PVS1 not triggered",
caveats=caveats or [],
)
def test_pvs1_propagates_caveats() -> None:
e = RuleEngine()
crit = e.score_pvs1(_autopvs1(caveats=["3' end exception applies"]))
assert crit is not None
assert "3' end" in (crit.caveat or "")
def test_score_all_aggregates() -> None:
e = RuleEngine()
bundle = EvidenceBundle(
population_frequency=PopulationFrequency(overall_af=0.00001),
insilico=InSilicoResult(revel=0.95, pp3_triggered=True),
autopvs1=_autopvs1(),
clinvar_existing=[
ClinVarSubmission(
accession="SCV0001", submitter="Invitae",
classification="Pathogenic", stars=3,
date="2024-01", condition="Hereditary cancer",
),
],
)
criteria = e.score_all(bundle)
codes = {c.code for c in criteria}
assert {"PVS1", "PM2", "PP3"}.issubset(codes)
assert "PP5" not in codes
def test_missing_population_frequency_does_not_trigger_pm2() -> None:
e = RuleEngine()
criteria = e.score_population(None)
assert all(c.code != "PM2" for c in criteria)
# --- Gene-mechanism gating ---------------------------------------------------
# These tests cover the discordances on the 100-variant ClinVar 4★ fixture
# where PIK3CD and MTOR variants were misclassified because in-silico
# predictors (BP4) and PVS1 ignore the gene's established disease mechanism.
def test_pvs1_suppressed_for_gof_gene_mtor() -> None:
"""MTOR's disease mechanism is gain-of-function. A null variant should
NOT trigger PVS1 — doing so caused VUS → LP misclassifications."""
e = RuleEngine()
crit = e.score_pvs1(_autopvs1(), gene_symbol="MTOR")
assert crit is None
def test_pvs1_suppressed_for_gof_gene_pik3cd() -> None:
e = RuleEngine()
crit = e.score_pvs1(_autopvs1(), gene_symbol="PIK3CD")
assert crit is None
def test_pvs1_fires_for_lof_gene() -> None:
"""RPE65 disease is biallelic LoF — PVS1 should fire normally."""
e = RuleEngine()
crit = e.score_pvs1(_autopvs1(), gene_symbol="RPE65")
assert crit is not None
assert crit.strength == "very_strong"
def test_pvs1_fires_when_no_gene_provided() -> None:
"""Backward compatibility — gene_symbol is optional."""
e = RuleEngine()
crit = e.score_pvs1(_autopvs1())
assert crit is not None
def test_bp4_capped_at_supporting_for_pik3cd() -> None:
"""PIK3CD is GOF — REVEL/AlphaMissense systematically under-call activating
missense as benign. Cap BP4 at supporting per ClinGen Immune Disorders VCEP.
This is the fix for 9 of 11 discordances on the validation set."""
e = RuleEngine()
# Strong-tier values that would normally fire BP4_strong
ins = InSilicoResult(revel=0.01, alphamissense=0.05, bp4_triggered=True)
criteria = e.score_insilico(ins, gene_symbol="PIK3CD")
bp4 = next(c for c in criteria if c.code == "BP4")
assert bp4.strength == "supporting"
assert bp4.caveat is not None
assert "PIK3CD" in bp4.caveat
def test_bp4_capped_at_supporting_for_pkd2() -> None:
"""PKD2 N-terminus missense over-fires BP4 from REVEL/AM — clinical
curation keeps these as VUS, not LB. Added 2026-05-12 after lab-panel
validation showed all 9 PKD2 misses share this pattern (variants in
codons 1-43 with PM2_supporting + BP4_moderate pulling them past
the -1 Bayesian LB boundary)."""
e = RuleEngine()
ins = InSilicoResult(revel=0.10, alphamissense=0.15, bp4_triggered=True)
criteria = e.score_insilico(ins, gene_symbol="PKD2")
bp4 = next(c for c in criteria if c.code == "BP4")
assert bp4.strength == "supporting"
assert "PKD2" in (bp4.caveat or "")
def test_bp4_strong_for_lof_gene_uncapped() -> None:
"""For LoF genes, the predictors are well-calibrated — let BP4_strong fire."""
e = RuleEngine()
ins = InSilicoResult(revel=0.01, alphamissense=0.05, bp4_triggered=True)
criteria = e.score_insilico(ins, gene_symbol="RPE65")
bp4 = next(c for c in criteria if c.code == "BP4")
assert bp4.strength == "strong"
assert bp4.caveat is None
def test_bp4_moderate_requires_alphamissense_concordance() -> None:
"""Pejaver 2022 calibration — a single predictor should not be enough to
push BP4 to moderate. Previous logic used `revel <= 0.290 OR am <= 0.099`
which was too lenient and contributed to over-calling LB."""
e = RuleEngine()
# REVEL borderline-low, AM clearly pathogenic — should not fire moderate
ins = InSilicoResult(revel=0.18, alphamissense=0.6, bp4_triggered=True)
criteria = e.score_insilico(ins, gene_symbol="RPE65")
bp4 = next(c for c in criteria if c.code == "BP4")
assert bp4.strength == "supporting"
def test_bp4_strength_tiers_pejaver_calibrated() -> None:
"""Spot-check the new BP4 strength tiers."""
e = RuleEngine()
# Strong: REVEL ≤ 0.016 with AM also low
ins_strong = InSilicoResult(revel=0.01, alphamissense=0.05, bp4_triggered=True)
assert e._bp4_strength(ins_strong) == "strong"
# Moderate: REVEL ≤ 0.183, AM concordant
ins_mod = InSilicoResult(revel=0.15, alphamissense=0.2, bp4_triggered=True)
assert e._bp4_strength(ins_mod) == "moderate"
# Supporting: REVEL between 0.183 and 0.290 (the BP4 trigger ceiling)
ins_sup = InSilicoResult(revel=0.25, alphamissense=0.2, bp4_triggered=True)
assert e._bp4_strength(ins_sup) == "supporting"
def test_bp4_moderate_requires_alphamissense_data() -> None:
"""When AlphaMissense data is missing, BP4 must NOT escalate to moderate
on REVEL alone — drove ~28 VUS→Likely-Benign misses on the 1000-variant
fixture."""
e = RuleEngine()
# REVEL ≤ 0.183 but no AM data — must stay supporting
ins = InSilicoResult(revel=0.06, alphamissense=None, bp4_triggered=True)
assert e._bp4_strength(ins) == "supporting"
ins2 = InSilicoResult(revel=0.01, alphamissense=None, bp4_triggered=True)
assert e._bp4_strength(ins2) == "supporting"
# --- BP1 (missense in LoF-only gene) -----------------------------------------
def test_bp1_fires_on_missense_in_lof_gene() -> None:
e = RuleEngine()
crit = e.score_bp1(consequence="missense_variant", gene_symbol="BRCA1")
assert crit is not None
assert crit.code == "BP1"
assert crit.strength == "supporting"
def test_bp1_does_not_fire_on_lof_consequence() -> None:
e = RuleEngine()
assert e.score_bp1(consequence="stop_gained", gene_symbol="BRCA1") is None
assert e.score_bp1(consequence="frameshift_variant", gene_symbol="BRCA2") is None
def test_bp1_does_not_fire_on_gof_gene() -> None:
"""PIK3CD missense — GOF mechanism — must not get BP1."""
e = RuleEngine()
assert e.score_bp1(consequence="missense_variant", gene_symbol="PIK3CD") is None
def test_bp1_does_not_fire_on_unknown_gene() -> None:
"""Conservative default: BP1 only on explicitly LoF-cataloged genes."""
e = RuleEngine()
assert e.score_bp1(consequence="missense_variant", gene_symbol="UNKNOWN_GENE") is None
assert e.score_bp1(consequence="missense_variant", gene_symbol=None) is None
# --- PP2 (missense in missense-mechanism gene) -------------------------------
def test_pp2_fires_on_missense_in_curated_gene() -> None:
"""MYH7 is a HCM-VCEP PP2-endorsed gene; missense must fire PP2."""
e = RuleEngine()
crit = e.score_pp2(consequence="missense_variant", gene_symbol="MYH7")
assert crit is not None
assert crit.code == "PP2"
assert crit.strength == "supporting"
assert "MYH7" in crit.evidence_text
def test_pp2_does_not_fire_on_lof_consequence() -> None:
e = RuleEngine()
assert e.score_pp2(consequence="stop_gained", gene_symbol="MYH7") is None
assert e.score_pp2(consequence="frameshift_variant", gene_symbol="FBN1") is None
assert e.score_pp2(consequence="synonymous_variant", gene_symbol="KCNQ1") is None
def test_pp2_does_not_fire_on_unknown_gene() -> None:
"""Conservative default: PP2 only on explicitly curated genes."""
e = RuleEngine()
assert e.score_pp2(consequence="missense_variant", gene_symbol="UNKNOWN_GENE") is None
assert e.score_pp2(consequence="missense_variant", gene_symbol=None) is None
def test_pp2_does_not_fire_on_lof_mechanism_gene() -> None:
"""BRCA1 missense should hit BP1, not PP2 — BRCA1 isn't on the PP2 list."""
e = RuleEngine()
assert e.score_pp2(consequence="missense_variant", gene_symbol="BRCA1") is None
def test_pp2_disallowed_by_vcep() -> None:
"""ENIGMA-BRCA explicitly disallows PP2; even if BRCA1 were on the list,
the VCEP override must suppress firing."""
from backend.app.services.acmg.vcep import lookup_vcep
e = RuleEngine()
vcep = lookup_vcep("BRCA1")
assert vcep is not None and vcep.pp2_disallowed
# Force-test the disallow path with a gene that IS on the PP2 list
# but happens to also be VCEP-covered (TP53):
tp53_vcep = lookup_vcep("TP53")
assert tp53_vcep is not None and tp53_vcep.pp2_disallowed
# --- BP7 (synonymous, no splice impact) --------------------------------------
def test_bp7_fires_on_synonymous_with_low_spliceai() -> None:
e = RuleEngine()
ins = InSilicoResult(spliceai_max=0.05)
crit = e.score_bp7(consequence="synonymous_variant", ins=ins)
assert crit is not None
assert crit.strength == "supporting"
def test_bp7_blocked_by_high_spliceai() -> None:
"""A synonymous change predicted to alter splicing must NOT trigger BP7."""
e = RuleEngine()
ins = InSilicoResult(spliceai_max=0.45)
assert e.score_bp7(consequence="synonymous_variant", ins=ins) is None
def test_bp7_fires_when_spliceai_unavailable_with_caveat() -> None:
"""SpliceAI cache miss is common — fire at supporting with a caveat
rather than dropping the criterion entirely."""
e = RuleEngine()
crit = e.score_bp7(consequence="synonymous_variant", ins=None)
assert crit is not None
assert "splice-impact manually" in (crit.caveat or "")
def test_bp7_does_not_fire_on_missense() -> None:
e = RuleEngine()
ins = InSilicoResult(spliceai_max=0.05)
assert e.score_bp7(consequence="missense_variant", ins=ins) is None
# --- score_all wires consequence through ------------------------------------
def test_score_all_threads_consequence_and_gene() -> None:
"""Regression — score_all must pass gene_symbol AND consequence to the
criterion-specific scorers; otherwise BP1/BP7 are silently dropped."""
e = RuleEngine()
bundle = EvidenceBundle(
population_frequency=PopulationFrequency(overall_af=0.00001),
insilico=InSilicoResult(spliceai_max=0.0),
)
crits = e.score_all(bundle, gene_symbol="BRCA1", consequence="missense_variant")
codes = {c.code for c in crits}
assert "BP1" in codes
assert "PM2" in codes
# --- PM1 (mutational hotspot / critical functional domain) -------------------
def test_pm1_fires_at_moderate_on_point_hotspot_tp53_r175() -> None:
"""TP53 R175 is one of the canonical IARC hotspots — must fire moderate."""
e = RuleEngine()
crit = e.score_pm1(
gene_symbol="TP53",
hgvs_protein="p.Arg175His",
consequence="missense_variant",
)
assert crit is not None
assert crit.code == "PM1"
assert crit.strength == "moderate"
assert "175" in crit.evidence_text
def test_pm1_fires_on_kras_g12() -> None:
e = RuleEngine()
crit = e.score_pm1(gene_symbol="KRAS", hgvs_protein="p.G12D", consequence=None)
assert crit is not None
assert crit.strength == "moderate"
def test_pm1_fires_at_supporting_on_domain_range() -> None:
"""A residue inside BRCA1's BRCT domain (1646-1859) but not on a
point hotspot — fires at supporting strength."""
e = RuleEngine()
crit = e.score_pm1(
gene_symbol="BRCA1",
hgvs_protein="p.Trp1751Arg",
consequence="missense_variant",
)
assert crit is not None
assert crit.strength == "supporting"
def test_pm1_does_not_fire_outside_hotspot() -> None:
"""A TP53 missense outside the DNA-binding domain (residue 50) should not."""
e = RuleEngine()
assert e.score_pm1("TP53", "p.A50P", "missense_variant") is None
def test_pm1_does_not_fire_on_unknown_gene() -> None:
e = RuleEngine()
assert e.score_pm1("UNKNOWN_GENE", "p.R100K", "missense_variant") is None
def test_pm1_skipped_for_synonymous() -> None:
"""PM1 is for missense; synonymous don't change the protein and shouldn't
pile evidence onto the hotspot residue."""
e = RuleEngine()
assert e.score_pm1("TP53", "p.Arg175=", "synonymous_variant") is None
def test_pm1_handles_three_letter_and_one_letter_hgvs() -> None:
"""parse_residue must work for both p.Arg175His and p.R175H."""
e = RuleEngine()
a = e.score_pm1("TP53", "p.Arg175His", "missense_variant")
b = e.score_pm1("TP53", "p.R175H", "missense_variant")
c = e.score_pm1("TP53", "NP_000537.3:p.(Arg175His)", "missense_variant")
for crit in (a, b, c):
assert crit is not None
assert crit.strength == "moderate"
def test_pm1_handles_missing_protein() -> None:
e = RuleEngine()
assert e.score_pm1("TP53", None, "missense_variant") is None
assert e.score_pm1("TP53", "", "missense_variant") is None
def test_score_all_threads_hgvs_protein_for_pm1() -> None:
"""score_all must thread hgvs_protein to score_pm1."""
e = RuleEngine()
bundle = EvidenceBundle(
population_frequency=PopulationFrequency(overall_af=0.00001),
insilico=InSilicoResult(),
)
crits = e.score_all(
bundle,
gene_symbol="TP53",
consequence="missense_variant",
hgvs_protein="p.R248Q",
)
codes = {c.code for c in crits}
assert "PM1" in codes
assert "PM2" in codes
# --- PS1 (same amino acid change as known pathogenic) -----------------------
def test_ps1_fires_on_exact_match_tp53_r175h() -> None:
"""TP53 R175H is one of the most-curated pathogenic variants ever.
An input variant with the exact same protein change must fire PS1
at strong strength."""
e = RuleEngine()
crit = e.score_ps1_pm5(
gene_symbol="TP53",
hgvs_protein="p.Arg175His",
consequence="missense_variant",
)
assert crit is not None
assert crit.code == "PS1"
assert crit.strength == "strong"
def test_ps1_fires_on_one_letter_hgvs() -> None:
e = RuleEngine()
crit = e.score_ps1_pm5("KRAS", "p.G12D", "missense_variant")
assert crit is not None
assert crit.code == "PS1"
def test_ps1_fires_on_braf_v600e() -> None:
e = RuleEngine()
crit = e.score_ps1_pm5("BRAF", "p.V600E", "missense_variant")
assert crit is not None
assert crit.code == "PS1"
assert crit.strength == "strong"
# --- PM5 (different change at same residue) ---------------------------------
def test_pm5_fires_when_residue_known_but_alt_different() -> None:
"""TP53 R175 has known pathogenic R175H/C/L. An input p.R175P
(different alt) should fire PM5 at moderate."""
e = RuleEngine()
crit = e.score_ps1_pm5("TP53", "p.R175P", "missense_variant")
assert crit is not None
assert crit.code == "PM5"
assert crit.strength == "moderate"
def test_pm5_fires_on_braf_v600q() -> None:
"""V600 has known V600E/K/R/D — V600Q should fire PM5, not PS1."""
e = RuleEngine()
crit = e.score_ps1_pm5("BRAF", "p.V600Q", "missense_variant")
assert crit is not None
assert crit.code == "PM5"
# --- Negative tests ---------------------------------------------------------
def test_ps1_pm5_does_not_fire_outside_known_residues() -> None:
"""TP53 codon 50 has no known pathogenic entries — neither fires."""
e = RuleEngine()
assert e.score_ps1_pm5("TP53", "p.A50P", "missense_variant") is None
def test_ps1_pm5_does_not_fire_on_unknown_gene() -> None:
e = RuleEngine()
assert e.score_ps1_pm5("UNKNOWN_GENE", "p.R100K", "missense_variant") is None
def test_ps1_pm5_skipped_for_synonymous() -> None:
e = RuleEngine()
assert e.score_ps1_pm5("TP53", "p.Arg175=", "synonymous_variant") is None
def test_ps1_pm5_skipped_for_stop_gain() -> None:
"""Stop-gain at a known pathogenic missense residue is PVS1's territory,
not PS1/PM5 — they're explicitly missense-scoped per Richards 2015."""
e = RuleEngine()
assert e.score_ps1_pm5("TP53", "p.R175*", "stop_gained") is None
# Even with consequence unknown, the alt being '*' should suppress
assert e.score_ps1_pm5("TP53", "p.R175*", None) is None
def test_ps1_takes_precedence_over_pm5() -> None:
"""When a variant matches an exact known-pathogenic AND the residue
has other variants, only PS1 fires — never PS1+PM5 simultaneously."""
e = RuleEngine()
crit = e.score_ps1_pm5("TP53", "p.R175H", "missense_variant")
assert crit is not None
assert crit.code == "PS1" # not PM5
# Single ACMGCriterion is returned, not a list — confirms scorer
# never emits both.
assert isinstance(crit, ACMGCriterion)
def test_score_all_threads_hgvs_protein_for_ps1() -> None:
e = RuleEngine()
bundle = EvidenceBundle(
population_frequency=PopulationFrequency(overall_af=0.00001),
)
crits = e.score_all(
bundle,
gene_symbol="TP53",
consequence="missense_variant",
hgvs_protein="p.R175H",
)
codes = {c.code for c in crits}
assert "PS1" in codes
assert "PM2" in codes
# --- Live ClinVar residue-neighbor evidence (PS1 / PM5) ----------------------
# These cover the runtime ClinVar same-residue lookup that supersedes the
# hand-curated table for any variant where the pipeline pre-fetched
# residue neighbors. The curated table remains the fallback for offline /
# NCBI-failed paths.
def _neighbor(residue: int, alt: str, accession: str = "VCV000012345", stars: int = 2,
classification: str = "Pathogenic") -> ClinVarResidueNeighbor:
return ClinVarResidueNeighbor(
accession=accession,
residue=residue,
alt_aa=alt,
protein_change=f"p.X{residue}{alt}",
classification=classification,
stars=stars,
)
def test_ps1_fires_from_live_residue_neighbors() -> None:
"""When ClinVar returns a P entry with the exact same alt at the same
residue, PS1 fires strong AND the source quotes the ClinVar accession
(not the curated table)."""
e = RuleEngine()
neighbors = [_neighbor(175, "H", accession="VCV000012345", stars=3)]
crit = e.score_ps1_pm5(
gene_symbol="TP53",
hgvs_protein="p.R175H",
consequence="missense_variant",
residue_neighbors=neighbors,
)
assert crit is not None
assert crit.code == "PS1"
assert crit.strength == "strong"
assert "VCV000012345" in crit.source
assert "ClinVar" in crit.source
def test_pm5_fires_from_live_residue_neighbors() -> None:
"""Live ClinVar has a different alt at the same residue → PM5 fires."""
e = RuleEngine()
neighbors = [
_neighbor(175, "H", accession="VCV000001"),
_neighbor(175, "C", accession="VCV000002"),
]
crit = e.score_ps1_pm5(
gene_symbol="TP53",
hgvs_protein="p.R175P", # not in the neighbor list
consequence="missense_variant",
residue_neighbors=neighbors,
)
assert crit is not None
assert crit.code == "PM5"
assert crit.strength == "moderate"
assert "ClinVar VCV000001" in crit.source
def test_live_neighbors_supersede_curated_table_for_new_genes() -> None:
"""A gene NOT in the curated known_pathogenic table — fires PS1 purely
from live data. Demonstrates the runtime lookup catches variants the
static table doesn't cover."""
e = RuleEngine()
neighbors = [_neighbor(742, "F", accession="VCV000999", stars=2)]
crit = e.score_ps1_pm5(
gene_symbol="OBSCURE_GENE",
hgvs_protein="p.L742F",
consequence="missense_variant",
residue_neighbors=neighbors,
)
assert crit is not None
assert crit.code == "PS1"
# Note source mentions VCV not a curated VCEP
assert "VCV000999" in crit.source
def test_falls_back_to_curated_table_when_no_live_neighbors() -> None:
"""Empty live neighbors → falls through to the curated table. TP53 R175H
is in the curated table, so PS1 still fires but with a curated-source caveat."""
e = RuleEngine()
crit = e.score_ps1_pm5(
gene_symbol="TP53",
hgvs_protein="p.R175H",
consequence="missense_variant",
residue_neighbors=[],
)
assert crit is not None
assert crit.code == "PS1"
# Source is the curated entry, not ClinVar
assert "ClinVar" not in crit.source or "IARC" in crit.source
def test_neither_fires_when_residue_has_no_neighbors_and_not_in_curated_table() -> None:
e = RuleEngine()
crit = e.score_ps1_pm5(
gene_symbol="OBSCURE_GENE",
hgvs_protein="p.L742F",
consequence="missense_variant",
residue_neighbors=[],
)
assert crit is None
def test_live_neighbors_only_filtered_to_correct_residue_by_pipeline() -> None:
"""Sanity: the pipeline pre-filters by residue, so if neighbors at residue
175 are passed and the variant is at residue 175, exact-alt match → PS1."""
e = RuleEngine()
# Pipeline would have filtered these to residue 175 already
neighbors = [_neighbor(175, "H"), _neighbor(175, "C")]
crit = e.score_ps1_pm5(
gene_symbol="TP53",
hgvs_protein="p.R175C", # exact match for the second neighbor
consequence="missense_variant",
residue_neighbors=neighbors,
)
assert crit is not None
assert crit.code == "PS1"