Spaces:
Running
Running
| """prefilter.py — PeVe v1.1""" | |
| from __future__ import annotations | |
| from dataclasses import dataclass, field | |
| from typing import Optional | |
| from config import VEP_CONSEQUENCE_MAP, L3_SUBSTITUTION_INVALID | |
| class VariantClass: | |
| raw_consequence: str | |
| all_consequences: list | |
| variant_class: str | |
| l3_substitution_valid: bool | |
| rna_priority: bool | |
| protein_priority: bool | |
| protein_deprioritised: bool | |
| transcript_conflict: bool | |
| out_of_scope: bool | |
| flags: list = field(default_factory=list) | |
| def classify_variant(ref, alt, vep_consequence, all_vep_consequences=None): | |
| if all_vep_consequences is None: | |
| all_vep_consequences = [vep_consequence] | |
| cons = vep_consequence.lower().strip() | |
| all_cons = [c.lower().strip() for c in all_vep_consequences] | |
| # MNV detection | |
| if len(ref) > 1 and len(alt) > 1 and len(ref) == len(alt): | |
| return VariantClass(cons, all_cons, "mnv", False, False, False, False, False, True, | |
| ["MNV: single-variant assessment may be incomplete"]) | |
| variant_class = VEP_CONSEQUENCE_MAP.get(cons, "unknown") | |
| if variant_class == "unknown": | |
| variant_class = _infer(ref, alt) | |
| mapped = {VEP_CONSEQUENCE_MAP.get(c, "unknown") for c in all_cons} | |
| tx_conflict = len(mapped) > 1 | |
| l3_valid = variant_class not in L3_SUBSTITUTION_INVALID | |
| rna_priority = variant_class == "canonical_splice" | |
| protein_priority = variant_class == "substitution_missense" | |
| protein_deprio = variant_class == "substitution_synonymous" | |
| out_of_scope = variant_class in {"utr_regulatory", "mnv", "unknown"} | |
| flags = [] | |
| if variant_class == "utr_regulatory": | |
| flags.append("UTR/regulatory: no mechanism pathway in PeVe v1.1.") | |
| if variant_class in {"frameshift", "stop_gained", "start_lost"}: | |
| flags.append(f"{variant_class}: Layer 3 substitution metrics NOT APPLICABLE.") | |
| if variant_class == "in_frame_indel": | |
| flags.append("In-frame indel: substitution biochemistry NOT APPLICABLE.") | |
| if variant_class == "deep_intronic": | |
| flags.append("Deep intronic: RNA interpretation down-prioritised.") | |
| if variant_class == "substitution_synonymous": | |
| flags.append("Synonymous: context signal alone cannot classify pathogenic.") | |
| if tx_conflict: | |
| flags.append("Transcript conflict: consequence differs across transcripts.") | |
| if variant_class == "unknown": | |
| flags.append("Variant class unknown — outputs are exploratory only.") | |
| return VariantClass(cons, all_cons, variant_class, l3_valid, | |
| rna_priority, protein_priority, protein_deprio, | |
| tx_conflict, out_of_scope, flags) | |
| def _infer(ref, alt): | |
| if len(ref) == 1 and len(alt) == 1: | |
| return "substitution_missense" | |
| diff = len(alt) - len(ref) | |
| if diff % 3 == 0: | |
| return "in_frame_indel" | |
| return "frameshift" | |