varientlens / backend /tests /test_normalization.py
Codex
Initial VariantLens clinical readiness scaffold
3e219fa
import pytest
from backend.app.schemas.variant import VariantInput
from backend.app.services.normalization import VariantNormalizer
@pytest.fixture
def normalizer() -> VariantNormalizer:
return VariantNormalizer()
@pytest.mark.parametrize(
"raw,expected",
[
("NM_000548.5:c.4639A>T", "hgvs"),
("NC_000016.10:g.2138015A>T", "hgvs"),
("ENST00000219476:c.100C>T", "hgvs"),
("p.Arg100His", "protein"),
("p.Arg100*", "protein"),
("16-2138015-A-T", "vcf"),
("chr16-2138015-A-T", "vcf"),
("16:2138015:A:T", "vcf"),
("nonsense", "unknown"),
],
)
def test_detect_notation(normalizer: VariantNormalizer, raw: str, expected: str) -> None:
assert normalizer.detect_notation(raw) == expected
def test_vcf_to_hgvs(normalizer: VariantNormalizer) -> None:
# GRCh38 RefSeq accession — Mutalyzer v3 rejects `chr16:g.`
assert normalizer._vcf_to_hgvs("16-2138015-A-T") == "NC_000016.10:g.2138015A>T"
assert normalizer._vcf_to_hgvs("chr16-2138015-A-T") == "NC_000016.10:g.2138015A>T"
assert normalizer._vcf_to_hgvs("X-100-G-A") == "NC_000023.11:g.100G>A"
@pytest.mark.asyncio
async def test_normalize_unknown_passthrough(normalizer: VariantNormalizer) -> None:
result = await normalizer.normalize(VariantInput(raw="garbage"))
assert result.normalization_source == "passthrough"
assert any("unknown" in w for w in result.warnings)
@pytest.mark.asyncio
async def test_normalize_protein_passthrough(normalizer: VariantNormalizer) -> None:
result = await normalizer.normalize(VariantInput(raw="p.Arg100His", gene_symbol="BRCA1"))
assert result.gene_symbol == "BRCA1"
assert result.hgvs_protein == "p.Arg100His"