nileshhanotia commited on
Commit
44fb3c3
·
verified ·
1 Parent(s): fbd71c4

decision_engine.py

Browse files
Files changed (1) hide show
  1. decision_engine.py +262 -0
decision_engine.py ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """decision_engine.py — PeVe v1.1 Deterministic Synthesis Engine"""
2
+ from __future__ import annotations
3
+ from dataclasses import dataclass, field
4
+ from typing import Optional
5
+ import numpy as np
6
+ from config import (
7
+ PEVE_VERSION, THRESHOLD_VERSION,
8
+ SPLICE_PROB_HIGH, SPLICE_PROB_MODERATE, SPLICE_PROB_WEAK,
9
+ SPLICE_SIGNAL_MIN, SPLICE_DOMINANT_MIN,
10
+ ACTIVATION_NORM_HIGH, ACTIVATION_NORM_MODERATE, ACTIVATION_NORM_WEAK,
11
+ CONTEXT_ACTIVE_MIN, BIOCHEMICAL_RISK_ACTIVE,
12
+ AF_RARITY_THRESHOLD, AF_HIGH_CONFLICT,
13
+ BOUNDARY_TOLERANCE, WINDOW_BP, PEAK_OFF_CENTER_FRAC,
14
+ )
15
+ from prefilter import VariantClass
16
+ from af_handler import AFResult, AF_NUMERIC, AF_ZERO, AF_UNKNOWN, AF_UNCERTAIN
17
+
18
+ # ── Raw layer outputs ─────────────────────────────────────
19
+ @dataclass
20
+ class SpliceLayerOutput:
21
+ splice_prob: float
22
+ splice_signal_strength: float
23
+ counterfactual_delta: float
24
+ saliency_map: Optional[object]
25
+ model_available: bool = True
26
+
27
+ @dataclass
28
+ class ContextLayerOutput:
29
+ context_pathogenic_prob: float
30
+ activation_norm: float
31
+ activation_peak_position: int
32
+ importance_score: float
33
+ model_available: bool = True
34
+
35
+ @dataclass
36
+ class ProteinLayerOutput:
37
+ biochemical_risk_score: float
38
+ feature_pathogenic_prob: float
39
+ shap_feature_contributions: dict
40
+ l3_substitution_valid: bool
41
+ model_available: bool = True
42
+
43
+ # ── Band classifiers ──────────────────────────────────────
44
+ def _splice_band(p):
45
+ if p >= SPLICE_PROB_HIGH: return "High"
46
+ if p >= SPLICE_PROB_MODERATE: return "Moderate"
47
+ if p >= SPLICE_PROB_WEAK: return "Weak"
48
+ return "Inactive"
49
+
50
+ def _context_band(n):
51
+ if n >= ACTIVATION_NORM_HIGH: return "High"
52
+ if n >= ACTIVATION_NORM_MODERATE: return "Moderate"
53
+ if n >= ACTIVATION_NORM_WEAK: return "Weak"
54
+ return "Inactive"
55
+
56
+ def _near(val, thresh): return abs(val - thresh) <= BOUNDARY_TOLERANCE
57
+ def _off_center(pos): return abs(pos - WINDOW_BP//2) > int(WINDOW_BP * PEAK_OFF_CENTER_FRAC)
58
+
59
+ # ── Activation levels ─────────────────────────────────────
60
+ @dataclass
61
+ class ActivationLevels:
62
+ splice_band: str; rna_active: bool; rna_dominant: bool
63
+ context_band: str; context_active: bool
64
+ protein_active: bool; l3_valid: bool
65
+ rna_boundary: bool; context_boundary: bool; protein_boundary: bool
66
+
67
+ def compute_activation_levels(splice, context, protein, af_result):
68
+ s_band = _splice_band(splice.splice_prob)
69
+ rna_active = splice.splice_prob >= SPLICE_PROB_MODERATE and splice.splice_signal_strength >= SPLICE_SIGNAL_MIN
70
+ rna_dominant = splice.splice_prob >= SPLICE_DOMINANT_MIN
71
+ c_band = _context_band(context.activation_norm)
72
+ ctx_active = context.activation_norm >= CONTEXT_ACTIVE_MIN
73
+ prot_active = (protein.l3_substitution_valid and
74
+ protein.biochemical_risk_score >= BIOCHEMICAL_RISK_ACTIVE and
75
+ af_result.satisfies_rarity())
76
+ rna_b = _near(splice.splice_prob, SPLICE_PROB_MODERATE) or _near(splice.splice_prob, SPLICE_DOMINANT_MIN) or _near(splice.splice_signal_strength, SPLICE_SIGNAL_MIN)
77
+ ctx_b = _near(context.activation_norm, CONTEXT_ACTIVE_MIN) or _near(context.activation_norm, ACTIVATION_NORM_HIGH)
78
+ pro_b = _near(protein.biochemical_risk_score, BIOCHEMICAL_RISK_ACTIVE)
79
+ return ActivationLevels(s_band, rna_active, rna_dominant, c_band, ctx_active, prot_active,
80
+ protein.l3_substitution_valid, rna_b, ctx_b, pro_b)
81
+
82
+ # ── Conflict detection ────────────────────────────────────
83
+ @dataclass
84
+ class ConflictReport:
85
+ major_conflicts: list = field(default_factory=list)
86
+ minor_conflicts: list = field(default_factory=list)
87
+ requires_manual_review: bool = False
88
+ conflict_score_major: int = 0
89
+ conflict_score_minor: int = 0
90
+
91
+ def compute_review_flag(self):
92
+ self.conflict_score_major = len(self.major_conflicts)
93
+ self.conflict_score_minor = len(self.minor_conflicts)
94
+ self.requires_manual_review = self.conflict_score_major >= 1 or self.conflict_score_minor >= 2
95
+
96
+ def detect_conflicts(splice, context, protein, af_result, activation, variant_class):
97
+ r = ConflictReport()
98
+ if splice.splice_prob >= SPLICE_PROB_HIGH and af_result.triggers_high_af_conflict():
99
+ r.major_conflicts.append(
100
+ f"MAJOR: High splice_prob ({splice.splice_prob:.3f}) + common variant (AF={af_result.global_af:.5f}). "
101
+ "Splice-disrupting variant unlikely at this population frequency.")
102
+ if (protein.l3_substitution_valid and protein.biochemical_risk_score >= BIOCHEMICAL_RISK_ACTIVE
103
+ and af_result.triggers_high_af_conflict()):
104
+ r.major_conflicts.append(
105
+ f"MAJOR: High biochemical risk ({protein.biochemical_risk_score:.3f}) + common variant "
106
+ f"(AF={af_result.global_af:.5f}). Common biochemically disruptive variants are typically tolerated.")
107
+ if variant_class.variant_class == "canonical_splice" and not activation.rna_active:
108
+ r.major_conflicts.append(
109
+ f"MAJOR: Canonical splice site ({variant_class.raw_consequence}) but RNA model inactive "
110
+ f"(splice_prob={splice.splice_prob:.3f}). Model/annotation disagreement.")
111
+ bnd = []
112
+ if activation.rna_boundary: bnd.append(f"splice_prob({splice.splice_prob:.3f})/signal({splice.splice_signal_strength:.3f})")
113
+ if activation.context_boundary: bnd.append(f"activation_norm({context.activation_norm:.3f})")
114
+ if activation.protein_boundary: bnd.append(f"biochemical_risk({protein.biochemical_risk_score:.3f})")
115
+ if bnd: r.minor_conflicts.append(f"MINOR: Boundary proximity — {'; '.join(bnd)} within ±{BOUNDARY_TOLERANCE}.")
116
+ if _off_center(context.activation_peak_position):
117
+ offset = abs(context.activation_peak_position - WINDOW_BP//2)
118
+ r.minor_conflicts.append(f"MINOR: Activation peak {offset}bp from mutation centre (pos={context.activation_peak_position}).")
119
+ if activation.context_active and variant_class.raw_consequence in {
120
+ "synonymous_variant","intron_variant","upstream_gene_variant","downstream_gene_variant"}:
121
+ r.minor_conflicts.append(
122
+ f"MINOR: Context active (norm={context.activation_norm:.3f}) but VEP='{variant_class.raw_consequence}' (low impact).")
123
+ if af_result.state in {AF_UNKNOWN, AF_UNCERTAIN}:
124
+ r.minor_conflicts.append(f"MINOR: AF state={af_result.state} — rarity unconfirmed.")
125
+ r.compute_review_flag()
126
+ return r
127
+
128
+ # ── Mechanism constants ───────────────────────────────────
129
+ DOMINANT_RNA = "RNA_Splicing"
130
+ DOMINANT_PROTEIN = "Protein_Biochemical"
131
+ DOMINANT_CONTEXT = "Sequence_Context"
132
+ DOMINANT_AMBIGUITY = "Mechanism_Ambiguity"
133
+ DOMINANT_TRUNCATION = "Protein_Truncation"
134
+ DOMINANT_INSUFFICIENT = "Insufficient_Evidence"
135
+ DOMINANT_OOS = "Out_Of_Scope"
136
+ DOMINANT_CONFLICT_REVIEW = "Conflict_Manual_Review"
137
+
138
+ # ── Synthesis result ──────────────────────────────────────
139
+ @dataclass
140
+ class SynthesisResult:
141
+ dominant_mechanism: str
142
+ final_classification: str
143
+ supporting_mechanisms: list
144
+ activation_levels: ActivationLevels
145
+ conflict_report: ConflictReport
146
+ reasoning_steps: list
147
+ transcript_ambiguity: bool
148
+ af_uncertainty: bool
149
+ version: str = PEVE_VERSION
150
+ threshold_version: str = THRESHOLD_VERSION
151
+
152
+ def _mkr(dom, cls, sup, act, conf, steps, vc, af):
153
+ return SynthesisResult(dom, cls, sup, act, conf, steps,
154
+ vc.transcript_conflict, af.state in {AF_UNKNOWN, AF_UNCERTAIN})
155
+
156
+ # ── Main synthesis ────────────────────────────────────────
157
+ def synthesize(splice, context, protein, af_result, variant_class):
158
+ act = compute_activation_levels(splice, context, protein, af_result)
159
+ conf = detect_conflicts(splice, context, protein, af_result, act, variant_class)
160
+ steps = []
161
+ sup = []
162
+
163
+ # Conflict override
164
+ if conf.requires_manual_review and conf.conflict_score_major >= 1:
165
+ steps.append(f"CONFLICT OVERRIDE: {conf.conflict_score_major} major conflict(s). Classification suppressed.")
166
+ return _mkr(DOMINANT_CONFLICT_REVIEW, "Conflict — Manual Review Required", [], act, conf, steps, variant_class, af_result)
167
+
168
+ # Out of scope
169
+ if variant_class.out_of_scope:
170
+ steps.append(f"Variant class '{variant_class.variant_class}' is outside PeVe v1.1 scope.")
171
+ return _mkr(DOMINANT_OOS, "Out of Scope — See Flags", [], act, conf, steps, variant_class, af_result)
172
+
173
+ # Truncation gate
174
+ if variant_class.variant_class in {"frameshift","stop_gained","start_lost"}:
175
+ steps.append(f"Variant class '{variant_class.variant_class}' — protein truncation. L3 substitution metrics excluded.")
176
+ if act.rna_active:
177
+ steps.append(f"RNA also active (splice_prob={splice.splice_prob:.3f}) — possible NMD-relevant splice signal.")
178
+ sup.append(DOMINANT_RNA)
179
+ return _mkr(DOMINANT_TRUNCATION, "Protein Truncation", sup, act, conf, steps, variant_class, af_result)
180
+
181
+ if variant_class.transcript_conflict:
182
+ steps.append("Transcript conflict: consequence differs across transcripts. Both mechanisms elevated.")
183
+
184
+ # Rule 1: RNA High → dominant
185
+ if act.rna_dominant:
186
+ steps.append(f"RULE 1: RNA HIGH (splice_prob={splice.splice_prob:.3f}≥{SPLICE_DOMINANT_MIN}, signal={splice.splice_signal_strength:.3f}). RNA dominant.")
187
+ if act.protein_active: sup.append(DOMINANT_PROTEIN); steps.append(f" Supporting: Protein active (risk={protein.biochemical_risk_score:.3f}).")
188
+ if act.context_active: sup.append(DOMINANT_CONTEXT); steps.append(f" Supporting: Context active (norm={context.activation_norm:.3f}).")
189
+ return _mkr(DOMINANT_RNA, "Pathogenic — RNA Splice Mechanism", sup, act, conf, steps, variant_class, af_result)
190
+
191
+ # Rule 1b: RNA Moderate + Protein Active → ambiguity
192
+ if act.rna_active and act.protein_active:
193
+ steps.append(f"RULE 1b: RNA MODERATE (splice_prob={splice.splice_prob:.3f}) + Protein ACTIVE (risk={protein.biochemical_risk_score:.3f}). Mechanism Ambiguity.")
194
+ return _mkr(DOMINANT_AMBIGUITY, "Mechanism Ambiguity — Manual Review Recommended",
195
+ [DOMINANT_RNA, DOMINANT_PROTEIN], act, conf, steps, variant_class, af_result)
196
+
197
+ # Rule 2: Protein dominant
198
+ if act.protein_active:
199
+ steps.append(f"RULE 2: RNA inactive. Protein ACTIVE (risk={protein.biochemical_risk_score:.3f}, AF={af_result.global_af}).")
200
+ if act.context_active: sup.append(DOMINANT_CONTEXT); steps.append(f" Supporting: Context active (norm={context.activation_norm:.3f}).")
201
+ if act.rna_active:
202
+ sup.append(DOMINANT_RNA)
203
+ steps.append(f" Note: Moderate RNA signal present (splice_prob={splice.splice_prob:.3f}). mechanism_ambiguity_flag added.")
204
+ conf.minor_conflicts.append("MINOR: Moderate RNA signal alongside Protein-dominant call.")
205
+ conf.compute_review_flag()
206
+ return _mkr(DOMINANT_PROTEIN, "Pathogenic — Protein Biochemical Mechanism", sup, act, conf, steps, variant_class, af_result)
207
+
208
+ # Rule 3: Context dominant
209
+ if act.context_active:
210
+ if variant_class.variant_class == "substitution_synonymous":
211
+ steps.append(f"RULE 3 BLOCKED: Context active but synonymous variant — context alone cannot classify pathogenic.")
212
+ else:
213
+ steps.append(f"RULE 3: RNA+Protein inactive. Context ACTIVE (norm={context.activation_norm:.3f}).")
214
+ return _mkr(DOMINANT_CONTEXT, "Uncertain — Sequence Context Signal Only", [], act, conf, steps, variant_class, af_result)
215
+
216
+ # Rule 4: Insufficient evidence
217
+ steps.append(
218
+ f"RULE 4: No mechanism active. RNA={act.splice_band} Context={act.context_band} "
219
+ f"Protein active={act.protein_active} (L3 valid={act.l3_valid}, rare={af_result.satisfies_rarity()})."
220
+ )
221
+ if conf.requires_manual_review:
222
+ steps.append(f"Minor conflict threshold reached ({conf.conflict_score_minor} minor). Upgrading to Review.")
223
+ return _mkr(DOMINANT_CONFLICT_REVIEW, "Conflict — Manual Review Required", [], act, conf, steps, variant_class, af_result)
224
+
225
+ return _mkr(DOMINANT_INSUFFICIENT, "Likely Benign or Insufficient Evidence", [], act, conf, steps, variant_class, af_result)
226
+
227
+ # ── Narrative builder ─────────────────────────────────────
228
+ def build_narrative(result, splice, context, protein, af_result, variant_class):
229
+ lines = [f"PeVe v{PEVE_VERSION} Structured Reasoning Narrative", "="*60]
230
+ lines.append(f"Variant class: {variant_class.variant_class.replace('_',' ').title()}")
231
+ lines.append(f"RNA: splice_prob={splice.splice_prob:.3f} (band={result.activation_levels.splice_band}), "
232
+ f"signal={splice.splice_signal_strength:.3f}. "
233
+ + ("ACTIVE." if result.activation_levels.rna_active else "INACTIVE."))
234
+ lines.append(f"Context: activation_norm={context.activation_norm:.3f} (band={result.activation_levels.context_band}). "
235
+ + ("ACTIVE." if result.activation_levels.context_active else "INACTIVE."))
236
+ if result.activation_levels.l3_valid:
237
+ af_str = f"AF={af_result.global_af:.6f}" if af_result.global_af is not None else f"AF_state={af_result.state}"
238
+ lines.append(f"Protein: biochemical_risk={protein.biochemical_risk_score:.3f}, {af_str}. "
239
+ + ("ACTIVE." if result.activation_levels.protein_active else "INACTIVE."))
240
+ else:
241
+ lines.append("Protein substitution metrics: NOT APPLICABLE for this variant class.")
242
+ lines.append("")
243
+ lines.append(f"Dominant mechanism: {result.dominant_mechanism.replace('_',' ')}")
244
+ lines.append(f"Final classification: {result.final_classification}")
245
+ if result.supporting_mechanisms:
246
+ lines.append(f"Supporting: {', '.join(m.replace('_',' ') for m in result.supporting_mechanisms)}")
247
+ if result.conflict_report.major_conflicts:
248
+ lines.append("\nMAJOR CONFLICTS:")
249
+ lines.extend(f" • {c}" for c in result.conflict_report.major_conflicts)
250
+ if result.conflict_report.minor_conflicts:
251
+ lines.append("MINOR CONFLICTS / BOUNDARY FLAGS:")
252
+ lines.extend(f" • {c}" for c in result.conflict_report.minor_conflicts)
253
+ if result.transcript_ambiguity:
254
+ lines.append("⚠ Transcript conflict: consequence differs across transcripts.")
255
+ if variant_class.flags:
256
+ lines.append("\nPre-filter flags:")
257
+ lines.extend(f" • {f}" for f in variant_class.flags)
258
+ if result.conflict_report.requires_manual_review:
259
+ lines.append("\n⛔ MANUAL REVIEW REQUIRED.")
260
+ lines.append("="*60)
261
+ lines.append(f"PeVe v{PEVE_VERSION} | Thresholds {THRESHOLD_VERSION} | No probability averaging.")
262
+ return "\n".join(lines)