"""prefilter.py — PeVe v1.1""" from __future__ import annotations from dataclasses import dataclass, field from typing import Optional from config import VEP_CONSEQUENCE_MAP, L3_SUBSTITUTION_INVALID @dataclass class VariantClass: raw_consequence: str all_consequences: list variant_class: str l3_substitution_valid: bool rna_priority: bool protein_priority: bool protein_deprioritised: bool transcript_conflict: bool out_of_scope: bool flags: list = field(default_factory=list) def classify_variant(ref, alt, vep_consequence, all_vep_consequences=None): if all_vep_consequences is None: all_vep_consequences = [vep_consequence] cons = vep_consequence.lower().strip() all_cons = [c.lower().strip() for c in all_vep_consequences] # MNV detection if len(ref) > 1 and len(alt) > 1 and len(ref) == len(alt): return VariantClass(cons, all_cons, "mnv", False, False, False, False, False, True, ["MNV: single-variant assessment may be incomplete"]) variant_class = VEP_CONSEQUENCE_MAP.get(cons, "unknown") if variant_class == "unknown": variant_class = _infer(ref, alt) mapped = {VEP_CONSEQUENCE_MAP.get(c, "unknown") for c in all_cons} tx_conflict = len(mapped) > 1 l3_valid = variant_class not in L3_SUBSTITUTION_INVALID rna_priority = variant_class == "canonical_splice" protein_priority = variant_class == "substitution_missense" protein_deprio = variant_class == "substitution_synonymous" out_of_scope = variant_class in {"utr_regulatory", "mnv", "unknown"} flags = [] if variant_class == "utr_regulatory": flags.append("UTR/regulatory: no mechanism pathway in PeVe v1.1.") if variant_class in {"frameshift", "stop_gained", "start_lost"}: flags.append(f"{variant_class}: Layer 3 substitution metrics NOT APPLICABLE.") if variant_class == "in_frame_indel": flags.append("In-frame indel: substitution biochemistry NOT APPLICABLE.") if variant_class == "deep_intronic": flags.append("Deep intronic: RNA interpretation down-prioritised.") if variant_class == "substitution_synonymous": flags.append("Synonymous: context signal alone cannot classify pathogenic.") if tx_conflict: flags.append("Transcript conflict: consequence differs across transcripts.") if variant_class == "unknown": flags.append("Variant class unknown — outputs are exploratory only.") return VariantClass(cons, all_cons, variant_class, l3_valid, rna_priority, protein_priority, protein_deprio, tx_conflict, out_of_scope, flags) def _infer(ref, alt): if len(ref) == 1 and len(alt) == 1: return "substitution_missense" diff = len(alt) - len(ref) if diff % 3 == 0: return "in_frame_indel" return "frameshift"