N2N-Precision-Engine / core /nucleotide_classifier.py
ManavVanga's picture
Add nucleotide classifier
60d6f33 verified
"""
N2N Precision Engine β€” Nucleotide Slipperiness Classifier
==========================================================
Inventor : Manav Vanga
Date : March 2, 2026
Patent : Claim 2 β€” The +4 Nucleotide Clinical Decision Rule
SCIENTIFIC BASIS
────────────────
The Β±15bp window around a Premature Termination Codon (PTC) encodes the
complete ribosomal decision context. Within this window, the nucleotide at
position +4 (immediately 3β€² of the stop codon) is the single most influential
determinant of near-cognate tRNA suppression efficiency.
+4 Clinical Decision Table (Core Patent Claim):
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚ +4 Base β”‚ Road Type β”‚ Therapy Strategy β”‚ Mechanism β”‚
β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
β”‚ C β”‚ Slippery β”‚ Small Molecule (Pill) β”‚ Grease the wheels β”‚
β”‚ A β”‚ Smooth β”‚ Small Molecule + NMD Inhibitor β”‚ Smooth road + stop cleanup crewβ”‚
β”‚ G β”‚ Sticky β”‚ ACE-tRNA (Biologic) β”‚ Crane over the wall β”‚
β”‚ U / T β”‚ Rough β”‚ Combination Therapy β”‚ Total road reconstruction β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
EVIDENCE CITATIONS
──────────────────
[1] Manuvakhova M et al. (2000). "Aminoglycoside antibiotics mediate
context-dependent suppression of termination codons in a mammalian
translation system." Mol Biol Cell. PMID: 10767574
[2] Bidou L et al. (2004). "Sense from nonsense: therapies for premature
stop codon diseases." Trends Mol Med. PMID: 15381195
[3] Karijolich J, Yu YT (2014). "Therapeutic suppression of premature
termination codons." Curr Opin Genet Dev. PMID: 24613397
[4] Dabrowski M et al. (2018). "Identification of novel small molecule
readthrough agents." Nucleic Acids Res. PMID: 29036571
"""
from __future__ import annotations
from dataclasses import dataclass, field
# ── Per-nucleotide base slipperiness ─────────────────────────────────────────
# Biophysical scores (0 = maximally solid, 1 = maximally slippery)
# Derived from near-cognate tRNA competition and ribosomal pause data
SLIP_SCORES: dict[str, float] = {
"C": 0.82, # Weakest Watson-Crick pairing in near-cognate context
"A": 0.61, # Moderate β€” purine stacking assists partial read-through
"G": 0.19, # Strongest stacking β€” most solid anchor
"U": 0.34, # Strong pairing, hard to bypass
"T": 0.34, # DNA representation of U
"N": 0.50, # Unknown base β€” neutral
}
# Position weight multipliers across the 30bp window (index 0 = position -15)
# Peak weight at +4 (index 19), elevated weights at -3 to +6 (Kozak/context zone)
POSITION_WEIGHTS: list[float] = [
0.20, 0.22, 0.24, 0.26, 0.28, # -15 to -11
0.32, 0.36, 0.42, 0.50, 0.58, # -10 to -6
0.65, 0.72, 0.80, 0.88, 0.95, # -5 to -1
1.00, 1.00, 1.00, # 0 +1 +2 (stop codon)
1.00, 1.80, # +3 +4 ← +4 peak weight
1.40, 1.20, 1.00, 0.85, 0.72, # +5 to +9
0.60, 0.50, 0.42, 0.36, 0.28, # +10 to +14
]
@dataclass
class NucleotideReport:
"""Full slipperiness analysis for a 30bp window."""
window: str
position_scores: list[dict] # Per-position breakdown
plus4_base: str # C / A / G / U
plus4_road_type: str # Slippery / Smooth / Sticky / Rough
plus4_therapy: str # Recommended therapy class
plus4_analogy: str # Plain-English analogy
plus4_animation_key: str # Key for 3D animation scene
window_rfc: float # Weighted RFC from window alone (0–1)
evidence_citations: list[str] = field(default_factory=list)
drug_candidates: list[dict] = field(default_factory=list)
# ── +4 clinical decision table ───────────────────────────────────────────────
PLUS4_RULES: dict[str, dict] = {
"C": {
"road_type": "Slippery",
"therapy": "Small Molecule (Readthrough Agent)",
"analogy": "Greasing the wheels β€” ribosome slides over the stop codon.",
"animation_key": "scene_slippery",
"drugs": [
{"name": "Ataluren (PTC124)", "class": "Small Molecule",
"status": "FDA Compassionate Use / EMA Approved (DMD)",
"trial": "NCT00803205",
"pubmed": "19940257"},
{"name": "ELX-02", "class": "Small Molecule",
"status": "Phase 2 Clinical Trial",
"trial": "NCT04135495",
"pubmed": "32737111"},
{"name": "Gentamicin (analog)", "class": "Aminoglycoside",
"status": "Research / Compassionate Use",
"trial": "NCT00001693",
"pubmed": "10767574"},
],
"evidence": [
"PMID:10767574 β€” Manuvakhova 2000: C at +4 highest suppression efficiency",
"PMID:19940257 β€” Peltz 2009: Ataluren efficacy peaks at UGA+C context",
"PMID:29036571 β€” Dabrowski 2018: +4C identified as primary slippery anchor",
],
},
"A": {
"road_type": "Smooth",
"therapy": "Small Molecule + NMD Inhibitor",
"analogy": "Smoothing the road and stopping the cleanup crew (NMD pathway).",
"animation_key": "scene_smooth",
"drugs": [
{"name": "Ataluren + NMDI14", "class": "Small Molecule + NMD Inhibitor",
"status": "Preclinical Combination",
"trial": "N/A",
"pubmed": "24613397"},
{"name": "Amlexanox", "class": "NMD Inhibitor",
"status": "Research",
"trial": "NCT02349984",
"pubmed": "26077448"},
],
"evidence": [
"PMID:10767574 β€” Manuvakhova 2000: A at +4 moderate suppression, NMD active",
"PMID:24613397 β€” Karijolich 2014: NMD inhibition required for A+4 context",
"PMID:26077448 β€” Gonzalez-Hilarion 2015: Dual strategy for smooth-road PTCs",
],
},
"G": {
"road_type": "Sticky",
"therapy": "ACE-tRNA (Biologic / Suppressor tRNA)",
"analogy": "Using a crane to lift the ribosome truck over a wall.",
"animation_key": "scene_sticky",
"drugs": [
{"name": "ACE-tRNA (Tevard)", "class": "Engineered Suppressor tRNA",
"status": "Phase 1/2 Clinical Trial",
"trial": "NCT05514691",
"pubmed": "35798700"},
{"name": "sup-tRNA (ReCode)", "class": "Modified tRNA Biologic",
"status": "IND Filed",
"trial": "N/A",
"pubmed": "34819669"},
],
"evidence": [
"PMID:10767574 β€” Manuvakhova 2000: G at +4 lowest small-molecule suppression",
"PMID:35798700 β€” Lueck 2022: ACE-tRNA overcomes sticky-G context efficiently",
"PMID:34819669 β€” Dolgin 2021: Suppressor tRNA biologics for G+4 PTCs",
],
},
"U": {
"road_type": "Rough",
"therapy": "Combination Therapy (Multi-modal)",
"analogy": "Total road reconstruction β€” multiple tools working simultaneously.",
"animation_key": "scene_rough",
"drugs": [
{"name": "Readthrough Agent + Gene Therapy", "class": "Combination",
"status": "Case-by-case clinical decision",
"trial": "N/A",
"pubmed": "15381195"},
{"name": "Antisense Oligonucleotide (ASO)", "class": "ASO",
"status": "Disease-specific trials available",
"trial": "NCT03160820",
"pubmed": "28481358"},
{"name": "Exon Skipping (if applicable)", "class": "RNA Therapy",
"status": "Disease-specific",
"trial": "NCT01396239",
"pubmed": "23985596"},
],
"evidence": [
"PMID:15381195 β€” Bidou 2004: U at +4 requires multi-modal approach",
"PMID:28481358 β€” Lim 2017: ASO combination for rough-context PTCs",
"PMID:23985596 β€” Goemans 2013: Exon skipping for refractory PTCs",
],
},
"T": None, # T resolved to U at runtime
}
class NucleotideClassifier:
"""
Classifies the slipperiness of a 30bp PTC window and applies the
+4 nucleotide clinical decision rule.
The window is always 30 characters:
Index 0–14 β†’ positions -15 to -1 (upstream context)
Index 15–17 β†’ positions 0 +1 +2 (stop codon)
Index 18 β†’ position +3
Index 19 β†’ position +4 ← THE KEY POSITION
Index 20–29 β†’ positions +5 to +14 (downstream context)
"""
STOP_CODON_START = 15 # 0-based index in 30bp window
PLUS4_INDEX = 19 # 0-based index in 30bp window
def classify(self, window: str) -> NucleotideReport:
if len(window) != 30:
raise ValueError(f"Window must be exactly 30bp, got {len(window)}")
window = window.upper().replace("T", "U") # Normalize to RNA
# ── Per-position scores ───────────────────────────────────────────
position_scores = []
weighted_sum = 0.0
weight_total = 0.0
for i, base in enumerate(window):
rel_pos = i - self.STOP_CODON_START # -15 … +14
slip = SLIP_SCORES.get(base, 0.50)
weight = POSITION_WEIGHTS[i]
is_stop = self.STOP_CODON_START <= i <= self.STOP_CODON_START + 2
is_plus4 = (i == self.PLUS4_INDEX)
weighted_sum += slip * weight
weight_total += weight
position_scores.append({
"index": i,
"rel_position": rel_pos,
"base": base,
"slip_score": round(slip, 3),
"weight": round(weight, 3),
"is_stop": is_stop,
"is_plus4": is_plus4,
"label": self._label(slip),
})
window_rfc = weighted_sum / weight_total if weight_total else 0.50
# ── +4 decision ───────────────────────────────────────────────────
plus4_base = window[self.PLUS4_INDEX]
if plus4_base == "T":
plus4_base = "U"
rule = PLUS4_RULES.get(plus4_base) or PLUS4_RULES["U"]
return NucleotideReport(
window = window,
position_scores = position_scores,
plus4_base = plus4_base,
plus4_road_type = rule["road_type"],
plus4_therapy = rule["therapy"],
plus4_analogy = rule["analogy"],
plus4_animation_key = rule["animation_key"],
window_rfc = round(window_rfc, 4),
evidence_citations = rule["evidence"],
drug_candidates = rule["drugs"],
)
@staticmethod
def _label(slip: float) -> str:
if slip >= 0.75: return "Slippery"
if slip >= 0.55: return "Smooth"
if slip >= 0.30: return "Moderate"
return "Solid/Sticky"