Spaces:
Sleeping
Sleeping
| """ | |
| N2N Precision Engine β N2NBrain Orchestrator | |
| ============================================= | |
| Inventor : Manav Vanga | |
| Date : March 2, 2026 | |
| Executes the complete three-tier RP-Score pipeline: | |
| Tier 1 β Contextual Windowing (EnsemblSequenceExtractor) | |
| Tier 2 β Transformer Inference (DNABERT2Inferencer + RFC + NucleotideClassifier) | |
| Tier 3 β Clinical Veto + Evidence (ClinVarValidator + DrugRecommender) | |
| Every call to brain.score() returns a CertifiedReport containing: | |
| β’ RP-Score (0-100) | |
| β’ +4 nucleotide identity and road type | |
| β’ Therapy recommendation with specific drugs | |
| β’ Full evidence bundle (ClinVar + PubMed + Trials + Biobank) | |
| β’ SHA-256 audit hash for regulatory trail | |
| β’ Animation scene key for 3D visualisation | |
| """ | |
| from __future__ import annotations | |
| import hashlib | |
| import json | |
| import logging | |
| from dataclasses import dataclass, field, asdict | |
| from datetime import datetime, timezone | |
| from typing import Optional | |
| from .sequence_extractor import EnsemblSequenceExtractor, PTCVariant | |
| from .dnabert_inferencer import DNABERT2Inferencer | |
| from .nucleotide_classifier import NucleotideClassifier, NucleotideReport | |
| from .clinvar_validator import ClinVarValidator | |
| from .drug_recommender import DrugRecommender, TherapyRecommendation | |
| log = logging.getLogger("N2NBrain") | |
| class CertifiedReport: | |
| """ | |
| The complete RP-Score certified output. | |
| This is the object returned to the app/website frontend. | |
| """ | |
| # Identity | |
| variant: PTCVariant | |
| timestamp_utc: str | |
| # Tier 1 | |
| window_sequence: str # 30bp extracted context | |
| window_display: str # Formatted with position labels | |
| # Tier 2 | |
| rfc_raw: float # Ribosomal Friction Coefficient (0-1) | |
| molecular_score: float # Pre-ClinVar RP-Score (0-100) | |
| nucleotide_report: NucleotideReport # Full per-position breakdown | |
| # +4 Decision | |
| plus4_base: str # C / A / G / U | |
| plus4_road_type: str # Slippery / Smooth / Sticky / Rough | |
| plus4_analogy: str # Plain-English explanation | |
| animation_scene: str # Key for 3D engine | |
| # Tier 3 | |
| clinvar_modifier: float | |
| clinvar_variants: int | |
| rp_score: float # Final certified score (0-100) | |
| confidence_interval: tuple[float,float] | |
| certification_tier: str # Dual-Certified / Molecular-Only / ClinVar-Flagged | |
| # Therapy | |
| therapy: TherapyRecommendation | |
| # Evidence | |
| evidence_links: list[str] # All ClinVar accession URLs | |
| pubmed_citations: list[dict] | |
| clinical_trials: list[dict] | |
| biobank_note: str | |
| fda_status: str | |
| # Audit | |
| audit_hash: str # SHA-256 of all inputs | |
| engine_version: str = "1.0.0" | |
| class N2NBrain: | |
| """ | |
| Main orchestrator β call brain.score(variant) to get a CertifiedReport. | |
| """ | |
| VERSION = "1.0.0" | |
| def __init__( | |
| self, | |
| rfc_weights_path: Optional[str] = None, | |
| ncbi_api_key: Optional[str] = None, | |
| device: str = "auto", | |
| ): | |
| log.info("Initialising N2N Precision Engine v%s", self.VERSION) | |
| self.extractor = EnsemblSequenceExtractor() | |
| self.inferencer = DNABERT2Inferencer(rfc_weights_path, device) | |
| self.classifier = NucleotideClassifier() | |
| self.validator = ClinVarValidator(ncbi_api_key) | |
| self.recommender = DrugRecommender() | |
| def score(self, variant: PTCVariant) -> CertifiedReport: | |
| log.info("Scoring %s:%s %s>%s (%s)", | |
| variant.chrom, variant.pos, variant.ref, variant.alt, variant.gene_symbol) | |
| # ββ TIER 1: Extract Β±15bp window βββββββββββββββββββββββββββββββββ | |
| window = self.extractor.fetch_window(variant.chrom, variant.pos) | |
| # ββ TIER 2: DNABERT-2 inference ββββββββββββββββββββββββββββββββββ | |
| inference = self.inferencer.infer(window) | |
| rfc_raw = inference["rfc_score"] | |
| mol_score = round(rfc_raw * 100.0, 2) | |
| # Nucleotide-level classification (+4 rule) | |
| nuc_report = self.classifier.classify(window) | |
| plus4 = nuc_report.plus4_base | |
| # ββ TIER 3: ClinVar Clinical Veto ββββββββββββββββββββββββββββββββ | |
| clinvar = self.validator.query(variant.gene_symbol, variant.chrom, variant.pos) | |
| modifier = clinvar["modifier"] | |
| corrected = float(max(0.0, min(100.0, mol_score * (1.0 + modifier)))) | |
| n_ev = clinvar["variants_reviewed"] | |
| ci_width = max(5.0, 20.0 - n_ev * 0.5) | |
| ci = (round(max(0.0, corrected - ci_width), 2), | |
| round(min(100.0, corrected + ci_width), 2)) | |
| # ββ Drug recommendation βββββββββββββββββββββββββββββββββββββββββββ | |
| therapy = self.recommender.recommend( | |
| gene = variant.gene_symbol, | |
| rp_score = corrected, | |
| plus4_base = plus4, | |
| clinvar_links= clinvar["evidence_links"], | |
| ) | |
| # ββ Window display string βββββββββββββββββββββββββββββββββββββββββ | |
| stop = window[15:18] | |
| disp = (f"5'- {window[:15]} | {stop} | {window[18:]} -3'\n" | |
| f" β upstream -15 β β β downstream +15 β\n" | |
| f" STOP (+4={window[19]})") | |
| # ββ Audit hash ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| audit_src = json.dumps({ | |
| "variant": asdict(variant), "window": window, | |
| "rfc_raw": rfc_raw, "modifier": modifier, | |
| "plus4": plus4, "rp_score": corrected, | |
| }, sort_keys=True) | |
| audit_hash = hashlib.sha256(audit_src.encode()).hexdigest() | |
| # ββ Assemble all evidence βββββββββββββββββββββββββββββββββββββββββ | |
| all_pubmed = (therapy.evidence.pubmed_citations + | |
| nuc_report.evidence_citations[:2]) # deduplicated at render | |
| return CertifiedReport( | |
| variant = variant, | |
| timestamp_utc = datetime.now(timezone.utc).isoformat(), | |
| window_sequence = window, | |
| window_display = disp, | |
| rfc_raw = round(rfc_raw, 4), | |
| molecular_score = mol_score, | |
| nucleotide_report = nuc_report, | |
| plus4_base = plus4, | |
| plus4_road_type = nuc_report.plus4_road_type, | |
| plus4_analogy = nuc_report.plus4_analogy, | |
| animation_scene = nuc_report.plus4_animation_key, | |
| clinvar_modifier = round(modifier, 4), | |
| clinvar_variants = n_ev, | |
| rp_score = round(corrected, 2), | |
| confidence_interval = ci, | |
| certification_tier = clinvar["tier"], | |
| therapy = therapy, | |
| evidence_links = clinvar["evidence_links"], | |
| pubmed_citations = all_pubmed, | |
| clinical_trials = therapy.evidence.clinical_trials, | |
| biobank_note = therapy.evidence.biobank_note, | |
| fda_status = therapy.evidence.fda_status, | |
| audit_hash = audit_hash, | |
| engine_version = self.VERSION, | |
| ) | |