import pytest from backend.app.schemas.variant import VariantInput from backend.app.services.normalization import VariantNormalizer @pytest.fixture def normalizer() -> VariantNormalizer: return VariantNormalizer() @pytest.mark.parametrize( "raw,expected", [ ("NM_000548.5:c.4639A>T", "hgvs"), ("NC_000016.10:g.2138015A>T", "hgvs"), ("ENST00000219476:c.100C>T", "hgvs"), ("p.Arg100His", "protein"), ("p.Arg100*", "protein"), ("16-2138015-A-T", "vcf"), ("chr16-2138015-A-T", "vcf"), ("16:2138015:A:T", "vcf"), ("nonsense", "unknown"), ], ) def test_detect_notation(normalizer: VariantNormalizer, raw: str, expected: str) -> None: assert normalizer.detect_notation(raw) == expected def test_vcf_to_hgvs(normalizer: VariantNormalizer) -> None: # GRCh38 RefSeq accession — Mutalyzer v3 rejects `chr16:g.` assert normalizer._vcf_to_hgvs("16-2138015-A-T") == "NC_000016.10:g.2138015A>T" assert normalizer._vcf_to_hgvs("chr16-2138015-A-T") == "NC_000016.10:g.2138015A>T" assert normalizer._vcf_to_hgvs("X-100-G-A") == "NC_000023.11:g.100G>A" @pytest.mark.asyncio async def test_normalize_unknown_passthrough(normalizer: VariantNormalizer) -> None: result = await normalizer.normalize(VariantInput(raw="garbage")) assert result.normalization_source == "passthrough" assert any("unknown" in w for w in result.warnings) @pytest.mark.asyncio async def test_normalize_protein_passthrough(normalizer: VariantNormalizer) -> None: result = await normalizer.normalize(VariantInput(raw="p.Arg100His", gene_symbol="BRCA1")) assert result.gene_symbol == "BRCA1" assert result.hgvs_protein == "p.Arg100His"