N2N-Precision-Engine / core /n2n_brain.py
ManavVanga's picture
Add n2n_brain.py
0bc8d6b verified
"""
N2N Precision Engine β€” N2NBrain Orchestrator
=============================================
Inventor : Manav Vanga
Date : March 2, 2026
Executes the complete three-tier RP-Score pipeline:
Tier 1 β†’ Contextual Windowing (EnsemblSequenceExtractor)
Tier 2 β†’ Transformer Inference (DNABERT2Inferencer + RFC + NucleotideClassifier)
Tier 3 β†’ Clinical Veto + Evidence (ClinVarValidator + DrugRecommender)
Every call to brain.score() returns a CertifiedReport containing:
β€’ RP-Score (0-100)
β€’ +4 nucleotide identity and road type
β€’ Therapy recommendation with specific drugs
β€’ Full evidence bundle (ClinVar + PubMed + Trials + Biobank)
β€’ SHA-256 audit hash for regulatory trail
β€’ Animation scene key for 3D visualisation
"""
from __future__ import annotations
import hashlib
import json
import logging
from dataclasses import dataclass, field, asdict
from datetime import datetime, timezone
from typing import Optional
from .sequence_extractor import EnsemblSequenceExtractor, PTCVariant
from .dnabert_inferencer import DNABERT2Inferencer
from .nucleotide_classifier import NucleotideClassifier, NucleotideReport
from .clinvar_validator import ClinVarValidator
from .drug_recommender import DrugRecommender, TherapyRecommendation
log = logging.getLogger("N2NBrain")
@dataclass
class CertifiedReport:
"""
The complete RP-Score certified output.
This is the object returned to the app/website frontend.
"""
# Identity
variant: PTCVariant
timestamp_utc: str
# Tier 1
window_sequence: str # 30bp extracted context
window_display: str # Formatted with position labels
# Tier 2
rfc_raw: float # Ribosomal Friction Coefficient (0-1)
molecular_score: float # Pre-ClinVar RP-Score (0-100)
nucleotide_report: NucleotideReport # Full per-position breakdown
# +4 Decision
plus4_base: str # C / A / G / U
plus4_road_type: str # Slippery / Smooth / Sticky / Rough
plus4_analogy: str # Plain-English explanation
animation_scene: str # Key for 3D engine
# Tier 3
clinvar_modifier: float
clinvar_variants: int
rp_score: float # Final certified score (0-100)
confidence_interval: tuple[float,float]
certification_tier: str # Dual-Certified / Molecular-Only / ClinVar-Flagged
# Therapy
therapy: TherapyRecommendation
# Evidence
evidence_links: list[str] # All ClinVar accession URLs
pubmed_citations: list[dict]
clinical_trials: list[dict]
biobank_note: str
fda_status: str
# Audit
audit_hash: str # SHA-256 of all inputs
engine_version: str = "1.0.0"
class N2NBrain:
"""
Main orchestrator β€” call brain.score(variant) to get a CertifiedReport.
"""
VERSION = "1.0.0"
def __init__(
self,
rfc_weights_path: Optional[str] = None,
ncbi_api_key: Optional[str] = None,
device: str = "auto",
):
log.info("Initialising N2N Precision Engine v%s", self.VERSION)
self.extractor = EnsemblSequenceExtractor()
self.inferencer = DNABERT2Inferencer(rfc_weights_path, device)
self.classifier = NucleotideClassifier()
self.validator = ClinVarValidator(ncbi_api_key)
self.recommender = DrugRecommender()
def score(self, variant: PTCVariant) -> CertifiedReport:
log.info("Scoring %s:%s %s>%s (%s)",
variant.chrom, variant.pos, variant.ref, variant.alt, variant.gene_symbol)
# ── TIER 1: Extract Β±15bp window ─────────────────────────────────
window = self.extractor.fetch_window(variant.chrom, variant.pos)
# ── TIER 2: DNABERT-2 inference ──────────────────────────────────
inference = self.inferencer.infer(window)
rfc_raw = inference["rfc_score"]
mol_score = round(rfc_raw * 100.0, 2)
# Nucleotide-level classification (+4 rule)
nuc_report = self.classifier.classify(window)
plus4 = nuc_report.plus4_base
# ── TIER 3: ClinVar Clinical Veto ────────────────────────────────
clinvar = self.validator.query(variant.gene_symbol, variant.chrom, variant.pos)
modifier = clinvar["modifier"]
corrected = float(max(0.0, min(100.0, mol_score * (1.0 + modifier))))
n_ev = clinvar["variants_reviewed"]
ci_width = max(5.0, 20.0 - n_ev * 0.5)
ci = (round(max(0.0, corrected - ci_width), 2),
round(min(100.0, corrected + ci_width), 2))
# ── Drug recommendation ───────────────────────────────────────────
therapy = self.recommender.recommend(
gene = variant.gene_symbol,
rp_score = corrected,
plus4_base = plus4,
clinvar_links= clinvar["evidence_links"],
)
# ── Window display string ─────────────────────────────────────────
stop = window[15:18]
disp = (f"5'- {window[:15]} | {stop} | {window[18:]} -3'\n"
f" ← upstream -15 β†’ ↑ ← downstream +15 β†’\n"
f" STOP (+4={window[19]})")
# ── Audit hash ────────────────────────────────────────────────────
audit_src = json.dumps({
"variant": asdict(variant), "window": window,
"rfc_raw": rfc_raw, "modifier": modifier,
"plus4": plus4, "rp_score": corrected,
}, sort_keys=True)
audit_hash = hashlib.sha256(audit_src.encode()).hexdigest()
# ── Assemble all evidence ─────────────────────────────────────────
all_pubmed = (therapy.evidence.pubmed_citations +
nuc_report.evidence_citations[:2]) # deduplicated at render
return CertifiedReport(
variant = variant,
timestamp_utc = datetime.now(timezone.utc).isoformat(),
window_sequence = window,
window_display = disp,
rfc_raw = round(rfc_raw, 4),
molecular_score = mol_score,
nucleotide_report = nuc_report,
plus4_base = plus4,
plus4_road_type = nuc_report.plus4_road_type,
plus4_analogy = nuc_report.plus4_analogy,
animation_scene = nuc_report.plus4_animation_key,
clinvar_modifier = round(modifier, 4),
clinvar_variants = n_ev,
rp_score = round(corrected, 2),
confidence_interval = ci,
certification_tier = clinvar["tier"],
therapy = therapy,
evidence_links = clinvar["evidence_links"],
pubmed_citations = all_pubmed,
clinical_trials = therapy.evidence.clinical_trials,
biobank_note = therapy.evidence.biobank_note,
fda_status = therapy.evidence.fda_status,
audit_hash = audit_hash,
engine_version = self.VERSION,
)