Spaces:
Sleeping
Sleeping
| from backend.app.schemas.evidence import ( | |
| ACMGCriterion, | |
| AutoPVS1Result, | |
| AutoPVS1Step, | |
| ClinVarResidueNeighbor, | |
| ClinVarSubmission, | |
| EvidenceBundle, | |
| InSilicoResult, | |
| PopulationFrequency, | |
| ) | |
| from backend.app.services.acmg.rules import RuleEngine | |
| def test_ba1_triggers_above_5pct() -> None: | |
| e = RuleEngine() | |
| criteria = e.score_population(PopulationFrequency(overall_af=0.06)) | |
| codes = [c.code for c in criteria] | |
| assert "BA1" in codes | |
| def test_pm2_triggers_below_threshold() -> None: | |
| e = RuleEngine() | |
| criteria = e.score_population(PopulationFrequency(overall_af=0.00001)) | |
| codes = [c.code for c in criteria] | |
| assert "PM2" in codes | |
| def test_pm2_fires_supporting_by_default() -> None: | |
| """PM2 fires at SUPPORTING by default (ClinGen SVI 2020). The | |
| score_all post-process step upgrades to MODERATE only when corroborating | |
| pathogenic evidence is present (see test_pm2_upgraded_when_path_support).""" | |
| e = RuleEngine() | |
| criteria = e.score_population(PopulationFrequency(overall_af=0.00001)) | |
| pm2 = next(c for c in criteria if c.code == "PM2") | |
| assert pm2.strength == "supporting" | |
| def test_pm2_upgraded_when_strong_path_support() -> None: | |
| """When PVS1 (or PP5_strong, or PS1, etc.) also fires, PM2 should | |
| upgrade to MODERATE so the Bayesian total crosses the LP threshold. | |
| This was the root cause of the 80 pathogenic→VUS misses on the | |
| 1000-variant fixture: PM2 + PP5_strong = +5 Bayesian points, just | |
| below the +6 LP threshold.""" | |
| e = RuleEngine() | |
| bundle = EvidenceBundle( | |
| population_frequency=PopulationFrequency(overall_af=0.00001), | |
| autopvs1=_autopvs1(), | |
| ) | |
| crits = e.score_all(bundle, gene_symbol="BRCA1", consequence="stop_gained") | |
| pm2 = next(c for c in crits if c.code == "PM2") | |
| assert pm2.strength == "moderate" | |
| assert "upgraded" in (pm2.evidence_text or "").lower() | |
| def test_pm2_stays_supporting_when_no_path_support() -> None: | |
| """A rare variant with NO other pathogenic criteria firing must NOT | |
| upgrade PM2 — would over-classify borderline-LB variants to VUS.""" | |
| e = RuleEngine() | |
| bundle = EvidenceBundle( | |
| population_frequency=PopulationFrequency(overall_af=0.00001), | |
| insilico=InSilicoResult(revel=0.05, bp4_triggered=True), # benign in-silico, no path | |
| ) | |
| crits = e.score_all(bundle) | |
| pm2 = next(c for c in crits if c.code == "PM2") | |
| assert pm2.strength == "supporting" | |
| def test_pm2_stays_supporting_when_only_weak_path_support() -> None: | |
| """Only weak path evidence (PP3 supporting) is not enough to upgrade — | |
| avoids stacking two weak criteria into a moderate.""" | |
| e = RuleEngine() | |
| bundle = EvidenceBundle( | |
| population_frequency=PopulationFrequency(overall_af=0.00001), | |
| insilico=InSilicoResult(revel=0.5, alphamissense=0.7, pp3_triggered=True), | |
| ) | |
| crits = e.score_all(bundle) | |
| pm2 = next(c for c in crits if c.code == "PM2") | |
| assert pm2.strength == "supporting" | |
| def test_bs2_triggers_with_homozygotes() -> None: | |
| e = RuleEngine() | |
| criteria = e.score_population(PopulationFrequency(overall_af=0.001, homozygote_count=5)) | |
| codes = [c.code for c in criteria] | |
| assert "BS2" in codes | |
| def test_pp3_triggers_when_concordant_pathogenic() -> None: | |
| e = RuleEngine() | |
| ins = InSilicoResult(revel=0.9, alphamissense=0.8, spliceai_max=0.6, pp3_triggered=True) | |
| criteria = e.score_insilico(ins) | |
| codes = [c.code for c in criteria] | |
| assert "PP3" in codes | |
| def _autopvs1(triggered: bool = True, caveats: list[str] | None = None) -> AutoPVS1Result: | |
| return AutoPVS1Result( | |
| triggered=triggered, | |
| strength="very_strong", | |
| rule="PVS1", | |
| reasoning=[AutoPVS1Step(step=1, label="Variant type", value="Stop-gained", **{"pass": True})], | |
| conclusion="PVS1 triggered" if triggered else "PVS1 not triggered", | |
| caveats=caveats or [], | |
| ) | |
| def test_pvs1_propagates_caveats() -> None: | |
| e = RuleEngine() | |
| crit = e.score_pvs1(_autopvs1(caveats=["3' end exception applies"])) | |
| assert crit is not None | |
| assert "3' end" in (crit.caveat or "") | |
| def test_score_all_aggregates() -> None: | |
| e = RuleEngine() | |
| bundle = EvidenceBundle( | |
| population_frequency=PopulationFrequency(overall_af=0.00001), | |
| insilico=InSilicoResult(revel=0.95, pp3_triggered=True), | |
| autopvs1=_autopvs1(), | |
| clinvar_existing=[ | |
| ClinVarSubmission( | |
| accession="SCV0001", submitter="Invitae", | |
| classification="Pathogenic", stars=3, | |
| date="2024-01", condition="Hereditary cancer", | |
| ), | |
| ], | |
| ) | |
| criteria = e.score_all(bundle) | |
| codes = {c.code for c in criteria} | |
| assert {"PVS1", "PM2", "PP3"}.issubset(codes) | |
| assert "PP5" not in codes | |
| def test_missing_population_frequency_does_not_trigger_pm2() -> None: | |
| e = RuleEngine() | |
| criteria = e.score_population(None) | |
| assert all(c.code != "PM2" for c in criteria) | |
| # --- Gene-mechanism gating --------------------------------------------------- | |
| # These tests cover the discordances on the 100-variant ClinVar 4★ fixture | |
| # where PIK3CD and MTOR variants were misclassified because in-silico | |
| # predictors (BP4) and PVS1 ignore the gene's established disease mechanism. | |
| def test_pvs1_suppressed_for_gof_gene_mtor() -> None: | |
| """MTOR's disease mechanism is gain-of-function. A null variant should | |
| NOT trigger PVS1 — doing so caused VUS → LP misclassifications.""" | |
| e = RuleEngine() | |
| crit = e.score_pvs1(_autopvs1(), gene_symbol="MTOR") | |
| assert crit is None | |
| def test_pvs1_suppressed_for_gof_gene_pik3cd() -> None: | |
| e = RuleEngine() | |
| crit = e.score_pvs1(_autopvs1(), gene_symbol="PIK3CD") | |
| assert crit is None | |
| def test_pvs1_fires_for_lof_gene() -> None: | |
| """RPE65 disease is biallelic LoF — PVS1 should fire normally.""" | |
| e = RuleEngine() | |
| crit = e.score_pvs1(_autopvs1(), gene_symbol="RPE65") | |
| assert crit is not None | |
| assert crit.strength == "very_strong" | |
| def test_pvs1_fires_when_no_gene_provided() -> None: | |
| """Backward compatibility — gene_symbol is optional.""" | |
| e = RuleEngine() | |
| crit = e.score_pvs1(_autopvs1()) | |
| assert crit is not None | |
| def test_bp4_capped_at_supporting_for_pik3cd() -> None: | |
| """PIK3CD is GOF — REVEL/AlphaMissense systematically under-call activating | |
| missense as benign. Cap BP4 at supporting per ClinGen Immune Disorders VCEP. | |
| This is the fix for 9 of 11 discordances on the validation set.""" | |
| e = RuleEngine() | |
| # Strong-tier values that would normally fire BP4_strong | |
| ins = InSilicoResult(revel=0.01, alphamissense=0.05, bp4_triggered=True) | |
| criteria = e.score_insilico(ins, gene_symbol="PIK3CD") | |
| bp4 = next(c for c in criteria if c.code == "BP4") | |
| assert bp4.strength == "supporting" | |
| assert bp4.caveat is not None | |
| assert "PIK3CD" in bp4.caveat | |
| def test_bp4_capped_at_supporting_for_pkd2() -> None: | |
| """PKD2 N-terminus missense over-fires BP4 from REVEL/AM — clinical | |
| curation keeps these as VUS, not LB. Added 2026-05-12 after lab-panel | |
| validation showed all 9 PKD2 misses share this pattern (variants in | |
| codons 1-43 with PM2_supporting + BP4_moderate pulling them past | |
| the -1 Bayesian LB boundary).""" | |
| e = RuleEngine() | |
| ins = InSilicoResult(revel=0.10, alphamissense=0.15, bp4_triggered=True) | |
| criteria = e.score_insilico(ins, gene_symbol="PKD2") | |
| bp4 = next(c for c in criteria if c.code == "BP4") | |
| assert bp4.strength == "supporting" | |
| assert "PKD2" in (bp4.caveat or "") | |
| def test_bp4_strong_for_lof_gene_uncapped() -> None: | |
| """For LoF genes, the predictors are well-calibrated — let BP4_strong fire.""" | |
| e = RuleEngine() | |
| ins = InSilicoResult(revel=0.01, alphamissense=0.05, bp4_triggered=True) | |
| criteria = e.score_insilico(ins, gene_symbol="RPE65") | |
| bp4 = next(c for c in criteria if c.code == "BP4") | |
| assert bp4.strength == "strong" | |
| assert bp4.caveat is None | |
| def test_bp4_moderate_requires_alphamissense_concordance() -> None: | |
| """Pejaver 2022 calibration — a single predictor should not be enough to | |
| push BP4 to moderate. Previous logic used `revel <= 0.290 OR am <= 0.099` | |
| which was too lenient and contributed to over-calling LB.""" | |
| e = RuleEngine() | |
| # REVEL borderline-low, AM clearly pathogenic — should not fire moderate | |
| ins = InSilicoResult(revel=0.18, alphamissense=0.6, bp4_triggered=True) | |
| criteria = e.score_insilico(ins, gene_symbol="RPE65") | |
| bp4 = next(c for c in criteria if c.code == "BP4") | |
| assert bp4.strength == "supporting" | |
| def test_bp4_strength_tiers_pejaver_calibrated() -> None: | |
| """Spot-check the new BP4 strength tiers.""" | |
| e = RuleEngine() | |
| # Strong: REVEL ≤ 0.016 with AM also low | |
| ins_strong = InSilicoResult(revel=0.01, alphamissense=0.05, bp4_triggered=True) | |
| assert e._bp4_strength(ins_strong) == "strong" | |
| # Moderate: REVEL ≤ 0.183, AM concordant | |
| ins_mod = InSilicoResult(revel=0.15, alphamissense=0.2, bp4_triggered=True) | |
| assert e._bp4_strength(ins_mod) == "moderate" | |
| # Supporting: REVEL between 0.183 and 0.290 (the BP4 trigger ceiling) | |
| ins_sup = InSilicoResult(revel=0.25, alphamissense=0.2, bp4_triggered=True) | |
| assert e._bp4_strength(ins_sup) == "supporting" | |
| def test_bp4_moderate_requires_alphamissense_data() -> None: | |
| """When AlphaMissense data is missing, BP4 must NOT escalate to moderate | |
| on REVEL alone — drove ~28 VUS→Likely-Benign misses on the 1000-variant | |
| fixture.""" | |
| e = RuleEngine() | |
| # REVEL ≤ 0.183 but no AM data — must stay supporting | |
| ins = InSilicoResult(revel=0.06, alphamissense=None, bp4_triggered=True) | |
| assert e._bp4_strength(ins) == "supporting" | |
| ins2 = InSilicoResult(revel=0.01, alphamissense=None, bp4_triggered=True) | |
| assert e._bp4_strength(ins2) == "supporting" | |
| # --- BP1 (missense in LoF-only gene) ----------------------------------------- | |
| def test_bp1_fires_on_missense_in_lof_gene() -> None: | |
| e = RuleEngine() | |
| crit = e.score_bp1(consequence="missense_variant", gene_symbol="BRCA1") | |
| assert crit is not None | |
| assert crit.code == "BP1" | |
| assert crit.strength == "supporting" | |
| def test_bp1_does_not_fire_on_lof_consequence() -> None: | |
| e = RuleEngine() | |
| assert e.score_bp1(consequence="stop_gained", gene_symbol="BRCA1") is None | |
| assert e.score_bp1(consequence="frameshift_variant", gene_symbol="BRCA2") is None | |
| def test_bp1_does_not_fire_on_gof_gene() -> None: | |
| """PIK3CD missense — GOF mechanism — must not get BP1.""" | |
| e = RuleEngine() | |
| assert e.score_bp1(consequence="missense_variant", gene_symbol="PIK3CD") is None | |
| def test_bp1_does_not_fire_on_unknown_gene() -> None: | |
| """Conservative default: BP1 only on explicitly LoF-cataloged genes.""" | |
| e = RuleEngine() | |
| assert e.score_bp1(consequence="missense_variant", gene_symbol="UNKNOWN_GENE") is None | |
| assert e.score_bp1(consequence="missense_variant", gene_symbol=None) is None | |
| # --- PP2 (missense in missense-mechanism gene) ------------------------------- | |
| def test_pp2_fires_on_missense_in_curated_gene() -> None: | |
| """MYH7 is a HCM-VCEP PP2-endorsed gene; missense must fire PP2.""" | |
| e = RuleEngine() | |
| crit = e.score_pp2(consequence="missense_variant", gene_symbol="MYH7") | |
| assert crit is not None | |
| assert crit.code == "PP2" | |
| assert crit.strength == "supporting" | |
| assert "MYH7" in crit.evidence_text | |
| def test_pp2_does_not_fire_on_lof_consequence() -> None: | |
| e = RuleEngine() | |
| assert e.score_pp2(consequence="stop_gained", gene_symbol="MYH7") is None | |
| assert e.score_pp2(consequence="frameshift_variant", gene_symbol="FBN1") is None | |
| assert e.score_pp2(consequence="synonymous_variant", gene_symbol="KCNQ1") is None | |
| def test_pp2_does_not_fire_on_unknown_gene() -> None: | |
| """Conservative default: PP2 only on explicitly curated genes.""" | |
| e = RuleEngine() | |
| assert e.score_pp2(consequence="missense_variant", gene_symbol="UNKNOWN_GENE") is None | |
| assert e.score_pp2(consequence="missense_variant", gene_symbol=None) is None | |
| def test_pp2_does_not_fire_on_lof_mechanism_gene() -> None: | |
| """BRCA1 missense should hit BP1, not PP2 — BRCA1 isn't on the PP2 list.""" | |
| e = RuleEngine() | |
| assert e.score_pp2(consequence="missense_variant", gene_symbol="BRCA1") is None | |
| def test_pp2_disallowed_by_vcep() -> None: | |
| """ENIGMA-BRCA explicitly disallows PP2; even if BRCA1 were on the list, | |
| the VCEP override must suppress firing.""" | |
| from backend.app.services.acmg.vcep import lookup_vcep | |
| e = RuleEngine() | |
| vcep = lookup_vcep("BRCA1") | |
| assert vcep is not None and vcep.pp2_disallowed | |
| # Force-test the disallow path with a gene that IS on the PP2 list | |
| # but happens to also be VCEP-covered (TP53): | |
| tp53_vcep = lookup_vcep("TP53") | |
| assert tp53_vcep is not None and tp53_vcep.pp2_disallowed | |
| # --- BP7 (synonymous, no splice impact) -------------------------------------- | |
| def test_bp7_fires_on_synonymous_with_low_spliceai() -> None: | |
| e = RuleEngine() | |
| ins = InSilicoResult(spliceai_max=0.05) | |
| crit = e.score_bp7(consequence="synonymous_variant", ins=ins) | |
| assert crit is not None | |
| assert crit.strength == "supporting" | |
| def test_bp7_blocked_by_high_spliceai() -> None: | |
| """A synonymous change predicted to alter splicing must NOT trigger BP7.""" | |
| e = RuleEngine() | |
| ins = InSilicoResult(spliceai_max=0.45) | |
| assert e.score_bp7(consequence="synonymous_variant", ins=ins) is None | |
| def test_bp7_fires_when_spliceai_unavailable_with_caveat() -> None: | |
| """SpliceAI cache miss is common — fire at supporting with a caveat | |
| rather than dropping the criterion entirely.""" | |
| e = RuleEngine() | |
| crit = e.score_bp7(consequence="synonymous_variant", ins=None) | |
| assert crit is not None | |
| assert "splice-impact manually" in (crit.caveat or "") | |
| def test_bp7_does_not_fire_on_missense() -> None: | |
| e = RuleEngine() | |
| ins = InSilicoResult(spliceai_max=0.05) | |
| assert e.score_bp7(consequence="missense_variant", ins=ins) is None | |
| # --- score_all wires consequence through ------------------------------------ | |
| def test_score_all_threads_consequence_and_gene() -> None: | |
| """Regression — score_all must pass gene_symbol AND consequence to the | |
| criterion-specific scorers; otherwise BP1/BP7 are silently dropped.""" | |
| e = RuleEngine() | |
| bundle = EvidenceBundle( | |
| population_frequency=PopulationFrequency(overall_af=0.00001), | |
| insilico=InSilicoResult(spliceai_max=0.0), | |
| ) | |
| crits = e.score_all(bundle, gene_symbol="BRCA1", consequence="missense_variant") | |
| codes = {c.code for c in crits} | |
| assert "BP1" in codes | |
| assert "PM2" in codes | |
| # --- PM1 (mutational hotspot / critical functional domain) ------------------- | |
| def test_pm1_fires_at_moderate_on_point_hotspot_tp53_r175() -> None: | |
| """TP53 R175 is one of the canonical IARC hotspots — must fire moderate.""" | |
| e = RuleEngine() | |
| crit = e.score_pm1( | |
| gene_symbol="TP53", | |
| hgvs_protein="p.Arg175His", | |
| consequence="missense_variant", | |
| ) | |
| assert crit is not None | |
| assert crit.code == "PM1" | |
| assert crit.strength == "moderate" | |
| assert "175" in crit.evidence_text | |
| def test_pm1_fires_on_kras_g12() -> None: | |
| e = RuleEngine() | |
| crit = e.score_pm1(gene_symbol="KRAS", hgvs_protein="p.G12D", consequence=None) | |
| assert crit is not None | |
| assert crit.strength == "moderate" | |
| def test_pm1_fires_at_supporting_on_domain_range() -> None: | |
| """A residue inside BRCA1's BRCT domain (1646-1859) but not on a | |
| point hotspot — fires at supporting strength.""" | |
| e = RuleEngine() | |
| crit = e.score_pm1( | |
| gene_symbol="BRCA1", | |
| hgvs_protein="p.Trp1751Arg", | |
| consequence="missense_variant", | |
| ) | |
| assert crit is not None | |
| assert crit.strength == "supporting" | |
| def test_pm1_does_not_fire_outside_hotspot() -> None: | |
| """A TP53 missense outside the DNA-binding domain (residue 50) should not.""" | |
| e = RuleEngine() | |
| assert e.score_pm1("TP53", "p.A50P", "missense_variant") is None | |
| def test_pm1_does_not_fire_on_unknown_gene() -> None: | |
| e = RuleEngine() | |
| assert e.score_pm1("UNKNOWN_GENE", "p.R100K", "missense_variant") is None | |
| def test_pm1_skipped_for_synonymous() -> None: | |
| """PM1 is for missense; synonymous don't change the protein and shouldn't | |
| pile evidence onto the hotspot residue.""" | |
| e = RuleEngine() | |
| assert e.score_pm1("TP53", "p.Arg175=", "synonymous_variant") is None | |
| def test_pm1_handles_three_letter_and_one_letter_hgvs() -> None: | |
| """parse_residue must work for both p.Arg175His and p.R175H.""" | |
| e = RuleEngine() | |
| a = e.score_pm1("TP53", "p.Arg175His", "missense_variant") | |
| b = e.score_pm1("TP53", "p.R175H", "missense_variant") | |
| c = e.score_pm1("TP53", "NP_000537.3:p.(Arg175His)", "missense_variant") | |
| for crit in (a, b, c): | |
| assert crit is not None | |
| assert crit.strength == "moderate" | |
| def test_pm1_handles_missing_protein() -> None: | |
| e = RuleEngine() | |
| assert e.score_pm1("TP53", None, "missense_variant") is None | |
| assert e.score_pm1("TP53", "", "missense_variant") is None | |
| def test_score_all_threads_hgvs_protein_for_pm1() -> None: | |
| """score_all must thread hgvs_protein to score_pm1.""" | |
| e = RuleEngine() | |
| bundle = EvidenceBundle( | |
| population_frequency=PopulationFrequency(overall_af=0.00001), | |
| insilico=InSilicoResult(), | |
| ) | |
| crits = e.score_all( | |
| bundle, | |
| gene_symbol="TP53", | |
| consequence="missense_variant", | |
| hgvs_protein="p.R248Q", | |
| ) | |
| codes = {c.code for c in crits} | |
| assert "PM1" in codes | |
| assert "PM2" in codes | |
| # --- PS1 (same amino acid change as known pathogenic) ----------------------- | |
| def test_ps1_fires_on_exact_match_tp53_r175h() -> None: | |
| """TP53 R175H is one of the most-curated pathogenic variants ever. | |
| An input variant with the exact same protein change must fire PS1 | |
| at strong strength.""" | |
| e = RuleEngine() | |
| crit = e.score_ps1_pm5( | |
| gene_symbol="TP53", | |
| hgvs_protein="p.Arg175His", | |
| consequence="missense_variant", | |
| ) | |
| assert crit is not None | |
| assert crit.code == "PS1" | |
| assert crit.strength == "strong" | |
| def test_ps1_fires_on_one_letter_hgvs() -> None: | |
| e = RuleEngine() | |
| crit = e.score_ps1_pm5("KRAS", "p.G12D", "missense_variant") | |
| assert crit is not None | |
| assert crit.code == "PS1" | |
| def test_ps1_fires_on_braf_v600e() -> None: | |
| e = RuleEngine() | |
| crit = e.score_ps1_pm5("BRAF", "p.V600E", "missense_variant") | |
| assert crit is not None | |
| assert crit.code == "PS1" | |
| assert crit.strength == "strong" | |
| # --- PM5 (different change at same residue) --------------------------------- | |
| def test_pm5_fires_when_residue_known_but_alt_different() -> None: | |
| """TP53 R175 has known pathogenic R175H/C/L. An input p.R175P | |
| (different alt) should fire PM5 at moderate.""" | |
| e = RuleEngine() | |
| crit = e.score_ps1_pm5("TP53", "p.R175P", "missense_variant") | |
| assert crit is not None | |
| assert crit.code == "PM5" | |
| assert crit.strength == "moderate" | |
| def test_pm5_fires_on_braf_v600q() -> None: | |
| """V600 has known V600E/K/R/D — V600Q should fire PM5, not PS1.""" | |
| e = RuleEngine() | |
| crit = e.score_ps1_pm5("BRAF", "p.V600Q", "missense_variant") | |
| assert crit is not None | |
| assert crit.code == "PM5" | |
| # --- Negative tests --------------------------------------------------------- | |
| def test_ps1_pm5_does_not_fire_outside_known_residues() -> None: | |
| """TP53 codon 50 has no known pathogenic entries — neither fires.""" | |
| e = RuleEngine() | |
| assert e.score_ps1_pm5("TP53", "p.A50P", "missense_variant") is None | |
| def test_ps1_pm5_does_not_fire_on_unknown_gene() -> None: | |
| e = RuleEngine() | |
| assert e.score_ps1_pm5("UNKNOWN_GENE", "p.R100K", "missense_variant") is None | |
| def test_ps1_pm5_skipped_for_synonymous() -> None: | |
| e = RuleEngine() | |
| assert e.score_ps1_pm5("TP53", "p.Arg175=", "synonymous_variant") is None | |
| def test_ps1_pm5_skipped_for_stop_gain() -> None: | |
| """Stop-gain at a known pathogenic missense residue is PVS1's territory, | |
| not PS1/PM5 — they're explicitly missense-scoped per Richards 2015.""" | |
| e = RuleEngine() | |
| assert e.score_ps1_pm5("TP53", "p.R175*", "stop_gained") is None | |
| # Even with consequence unknown, the alt being '*' should suppress | |
| assert e.score_ps1_pm5("TP53", "p.R175*", None) is None | |
| def test_ps1_takes_precedence_over_pm5() -> None: | |
| """When a variant matches an exact known-pathogenic AND the residue | |
| has other variants, only PS1 fires — never PS1+PM5 simultaneously.""" | |
| e = RuleEngine() | |
| crit = e.score_ps1_pm5("TP53", "p.R175H", "missense_variant") | |
| assert crit is not None | |
| assert crit.code == "PS1" # not PM5 | |
| # Single ACMGCriterion is returned, not a list — confirms scorer | |
| # never emits both. | |
| assert isinstance(crit, ACMGCriterion) | |
| def test_score_all_threads_hgvs_protein_for_ps1() -> None: | |
| e = RuleEngine() | |
| bundle = EvidenceBundle( | |
| population_frequency=PopulationFrequency(overall_af=0.00001), | |
| ) | |
| crits = e.score_all( | |
| bundle, | |
| gene_symbol="TP53", | |
| consequence="missense_variant", | |
| hgvs_protein="p.R175H", | |
| ) | |
| codes = {c.code for c in crits} | |
| assert "PS1" in codes | |
| assert "PM2" in codes | |
| # --- Live ClinVar residue-neighbor evidence (PS1 / PM5) ---------------------- | |
| # These cover the runtime ClinVar same-residue lookup that supersedes the | |
| # hand-curated table for any variant where the pipeline pre-fetched | |
| # residue neighbors. The curated table remains the fallback for offline / | |
| # NCBI-failed paths. | |
| def _neighbor(residue: int, alt: str, accession: str = "VCV000012345", stars: int = 2, | |
| classification: str = "Pathogenic") -> ClinVarResidueNeighbor: | |
| return ClinVarResidueNeighbor( | |
| accession=accession, | |
| residue=residue, | |
| alt_aa=alt, | |
| protein_change=f"p.X{residue}{alt}", | |
| classification=classification, | |
| stars=stars, | |
| ) | |
| def test_ps1_fires_from_live_residue_neighbors() -> None: | |
| """When ClinVar returns a P entry with the exact same alt at the same | |
| residue, PS1 fires strong AND the source quotes the ClinVar accession | |
| (not the curated table).""" | |
| e = RuleEngine() | |
| neighbors = [_neighbor(175, "H", accession="VCV000012345", stars=3)] | |
| crit = e.score_ps1_pm5( | |
| gene_symbol="TP53", | |
| hgvs_protein="p.R175H", | |
| consequence="missense_variant", | |
| residue_neighbors=neighbors, | |
| ) | |
| assert crit is not None | |
| assert crit.code == "PS1" | |
| assert crit.strength == "strong" | |
| assert "VCV000012345" in crit.source | |
| assert "ClinVar" in crit.source | |
| def test_pm5_fires_from_live_residue_neighbors() -> None: | |
| """Live ClinVar has a different alt at the same residue → PM5 fires.""" | |
| e = RuleEngine() | |
| neighbors = [ | |
| _neighbor(175, "H", accession="VCV000001"), | |
| _neighbor(175, "C", accession="VCV000002"), | |
| ] | |
| crit = e.score_ps1_pm5( | |
| gene_symbol="TP53", | |
| hgvs_protein="p.R175P", # not in the neighbor list | |
| consequence="missense_variant", | |
| residue_neighbors=neighbors, | |
| ) | |
| assert crit is not None | |
| assert crit.code == "PM5" | |
| assert crit.strength == "moderate" | |
| assert "ClinVar VCV000001" in crit.source | |
| def test_live_neighbors_supersede_curated_table_for_new_genes() -> None: | |
| """A gene NOT in the curated known_pathogenic table — fires PS1 purely | |
| from live data. Demonstrates the runtime lookup catches variants the | |
| static table doesn't cover.""" | |
| e = RuleEngine() | |
| neighbors = [_neighbor(742, "F", accession="VCV000999", stars=2)] | |
| crit = e.score_ps1_pm5( | |
| gene_symbol="OBSCURE_GENE", | |
| hgvs_protein="p.L742F", | |
| consequence="missense_variant", | |
| residue_neighbors=neighbors, | |
| ) | |
| assert crit is not None | |
| assert crit.code == "PS1" | |
| # Note source mentions VCV not a curated VCEP | |
| assert "VCV000999" in crit.source | |
| def test_falls_back_to_curated_table_when_no_live_neighbors() -> None: | |
| """Empty live neighbors → falls through to the curated table. TP53 R175H | |
| is in the curated table, so PS1 still fires but with a curated-source caveat.""" | |
| e = RuleEngine() | |
| crit = e.score_ps1_pm5( | |
| gene_symbol="TP53", | |
| hgvs_protein="p.R175H", | |
| consequence="missense_variant", | |
| residue_neighbors=[], | |
| ) | |
| assert crit is not None | |
| assert crit.code == "PS1" | |
| # Source is the curated entry, not ClinVar | |
| assert "ClinVar" not in crit.source or "IARC" in crit.source | |
| def test_neither_fires_when_residue_has_no_neighbors_and_not_in_curated_table() -> None: | |
| e = RuleEngine() | |
| crit = e.score_ps1_pm5( | |
| gene_symbol="OBSCURE_GENE", | |
| hgvs_protein="p.L742F", | |
| consequence="missense_variant", | |
| residue_neighbors=[], | |
| ) | |
| assert crit is None | |
| def test_live_neighbors_only_filtered_to_correct_residue_by_pipeline() -> None: | |
| """Sanity: the pipeline pre-filters by residue, so if neighbors at residue | |
| 175 are passed and the variant is at residue 175, exact-alt match → PS1.""" | |
| e = RuleEngine() | |
| # Pipeline would have filtered these to residue 175 already | |
| neighbors = [_neighbor(175, "H"), _neighbor(175, "C")] | |
| crit = e.score_ps1_pm5( | |
| gene_symbol="TP53", | |
| hgvs_protein="p.R175C", # exact match for the second neighbor | |
| consequence="missense_variant", | |
| residue_neighbors=neighbors, | |
| ) | |
| assert crit is not None | |
| assert crit.code == "PS1" | |