File size: 1,731 Bytes
3e219fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import pytest

from backend.app.schemas.variant import VariantInput
from backend.app.services.normalization import VariantNormalizer


@pytest.fixture
def normalizer() -> VariantNormalizer:
    return VariantNormalizer()


@pytest.mark.parametrize(
    "raw,expected",
    [
        ("NM_000548.5:c.4639A>T", "hgvs"),
        ("NC_000016.10:g.2138015A>T", "hgvs"),
        ("ENST00000219476:c.100C>T", "hgvs"),
        ("p.Arg100His", "protein"),
        ("p.Arg100*", "protein"),
        ("16-2138015-A-T", "vcf"),
        ("chr16-2138015-A-T", "vcf"),
        ("16:2138015:A:T", "vcf"),
        ("nonsense", "unknown"),
    ],
)
def test_detect_notation(normalizer: VariantNormalizer, raw: str, expected: str) -> None:
    assert normalizer.detect_notation(raw) == expected


def test_vcf_to_hgvs(normalizer: VariantNormalizer) -> None:
    # GRCh38 RefSeq accession — Mutalyzer v3 rejects `chr16:g.`
    assert normalizer._vcf_to_hgvs("16-2138015-A-T") == "NC_000016.10:g.2138015A>T"
    assert normalizer._vcf_to_hgvs("chr16-2138015-A-T") == "NC_000016.10:g.2138015A>T"
    assert normalizer._vcf_to_hgvs("X-100-G-A") == "NC_000023.11:g.100G>A"


@pytest.mark.asyncio
async def test_normalize_unknown_passthrough(normalizer: VariantNormalizer) -> None:
    result = await normalizer.normalize(VariantInput(raw="garbage"))
    assert result.normalization_source == "passthrough"
    assert any("unknown" in w for w in result.warnings)


@pytest.mark.asyncio
async def test_normalize_protein_passthrough(normalizer: VariantNormalizer) -> None:
    result = await normalizer.normalize(VariantInput(raw="p.Arg100His", gene_symbol="BRCA1"))
    assert result.gene_symbol == "BRCA1"
    assert result.hgvs_protein == "p.Arg100His"