Spaces:
Sleeping
Sleeping
| """ | |
| N2N Precision Engine β Production API v3.0 | |
| Inventor: Manav Vanga | Patent Pending 2026 | |
| Brain: DNABERT-2 v2 (Pearson r=0.941, trained on 30,387 biological variants) | |
| Calibrated thresholds: HIGH=0.88, MED=0.76 | |
| Includes: Full drug database + ClinicalTrials.gov live integration | |
| """ | |
| import os, re, hashlib, threading | |
| from datetime import datetime, timezone | |
| import numpy as np | |
| import requests | |
| from flask import Flask, request, jsonify | |
| from flask_cors import CORS | |
| app = Flask(__name__) | |
| CORS(app) | |
| # ββ Inventor constants ββββββββββββββββββββββββββββββββββββββββββββ | |
| SLIP_SCORES = {'C':0.82,'A':0.61,'T':0.34,'U':0.34,'G':0.19,'N':0.50} | |
| POSITION_WEIGHTS = [ | |
| 0.20,0.22,0.24,0.26,0.28,0.32,0.36,0.42,0.50,0.58, | |
| 0.65,0.72,0.80,0.88,0.95,1.00,1.00,1.00,1.80, | |
| 1.40,1.20,1.00,0.85,0.72,0.60,0.50,0.42,0.36,0.28 | |
| ] | |
| # ββ Calibrated thresholds (from validation on 10 known variants) ββ | |
| HIGH_THRESHOLD = 0.88 | |
| MED_THRESHOLD = 0.76 | |
| PLUS4_ROAD = { | |
| 'C':('Slippery','High readthrough β ribosome slides through stop codon'), | |
| 'A':('Smooth', 'Moderate readthrough β some ribosomal slippage'), | |
| 'T':('Rough', 'Low readthrough β ribosome mostly terminates'), | |
| 'U':('Rough', 'Low readthrough β ribosome mostly terminates'), | |
| 'G':('Sticky', 'Very low readthrough β ribosome terminates strongly'), | |
| } | |
| # ββ Complete drug database ββββββββββββββββββββββββββββββββββββββββ | |
| DRUG_DATABASE = { | |
| 'HIGH': { | |
| 'therapy': 'Readthrough Therapy β Strong Candidate', | |
| 'mechanism': 'Promote ribosomal readthrough of premature stop codon', | |
| 'approved': [ | |
| { | |
| 'name': 'Ataluren (PTC124)', | |
| 'status': 'EMA Approved (EU) β FDA Breakthrough Therapy', | |
| 'diseases': ['Duchenne MD', 'Cystic Fibrosis'], | |
| 'dose': '10/10/20 mg/kg three times daily', | |
| 'note': 'First-in-class readthrough drug' | |
| }, | |
| ], | |
| 'phase3': [ | |
| { | |
| 'name': 'ELX-02 (Eloxx)', | |
| 'status': 'Phase 3 Clinical Trial', | |
| 'diseases': ['Cystic Fibrosis', 'Dravet Syndrome'], | |
| 'mechanism': 'Eukaryotic ribosome-targeting aminoglycoside', | |
| 'note': 'More selective than gentamicin, less nephrotoxic' | |
| }, | |
| ], | |
| 'phase2': [ | |
| { | |
| 'name': 'SRI-37240 + SRI-41315', | |
| 'status': 'Phase 2', | |
| 'diseases': ['Cystic Fibrosis'], | |
| 'mechanism': 'Novel readthrough compound class', | |
| 'note': 'University of Alabama Birmingham' | |
| }, | |
| { | |
| 'name': 'Gentamicin (G418)', | |
| 'status': 'Phase 2 / Off-label', | |
| 'diseases': ['Multiple β aminoglycoside readthrough'], | |
| 'mechanism': 'Aminoglycoside-induced misreading of stop codon', | |
| 'note': 'Nephrotoxicity limits long-term use' | |
| }, | |
| ], | |
| 'preclinical': [ | |
| 'Negamycin derivatives', | |
| 'NV848 (Nonsense Therapeutics)', | |
| 'Escin β natural readthrough compound', | |
| 'Tylosin β macrolide with readthrough activity', | |
| ], | |
| 'combination': [ | |
| 'Ataluren + NMD inhibitor (amlexanox)', | |
| 'ELX-02 + CFTR corrector (lumacaftor)', | |
| 'Readthrough + proteasome inhibitor', | |
| ] | |
| }, | |
| 'MEDIUM': { | |
| 'therapy': 'Combination Approach β Moderate Candidate', | |
| 'mechanism': 'Combine readthrough with NMD suppression', | |
| 'approved': [ | |
| { | |
| 'name': 'Gentamicin', | |
| 'status': 'Off-label / Investigational', | |
| 'diseases': ['Multiple'], | |
| 'note': 'Short-term use, monitor kidneys' | |
| } | |
| ], | |
| 'phase3': [ | |
| { | |
| 'name': 'ELX-02', | |
| 'status': 'Phase 3 β may benefit moderate responders', | |
| 'diseases': ['CF', 'Dravet'], | |
| 'note': 'Trial enrollment open' | |
| } | |
| ], | |
| 'phase2': [ | |
| { | |
| 'name': 'Amlexanox + Readthrough', | |
| 'status': 'Phase 2 combination', | |
| 'diseases': ['Multiple NMD diseases'], | |
| 'mechanism': 'NMD inhibition prolongs readthrough mRNA', | |
| 'note': 'Increases mRNA half-life for readthrough product' | |
| } | |
| ], | |
| 'preclinical': [ | |
| 'SMG1 kinase inhibitors', | |
| 'NMDI-14', | |
| 'UPF1 inhibitors', | |
| ], | |
| 'combination': [ | |
| 'Readthrough + NMD inhibitor', | |
| 'Low-dose gentamicin + antioxidant', | |
| ] | |
| }, | |
| 'LOW': { | |
| 'therapy': 'Alternative Strategy β Poor Readthrough Candidate', | |
| 'mechanism': 'Bypass or compensate for the nonsense mutation', | |
| 'approved': [ | |
| { | |
| 'name': 'Eteplirsen (Exondys 51)', | |
| 'status': 'FDA Approved', | |
| 'diseases': ['Duchenne MD β exon 51 skipping'], | |
| 'note': 'Exon skipping β bypasses mutation entirely' | |
| }, | |
| { | |
| 'name': 'Nusinersen (Spinraza)', | |
| 'status': 'FDA Approved', | |
| 'diseases': ['Spinal Muscular Atrophy'], | |
| 'note': 'Antisense oligonucleotide β splicing modulation' | |
| }, | |
| { | |
| 'name': 'Onasemnogene (Zolgensma)', | |
| 'status': 'FDA Approved', | |
| 'diseases': ['SMA type 1'], | |
| 'note': 'Gene replacement therapy' | |
| }, | |
| ], | |
| 'phase3': [ | |
| { | |
| 'name': 'Casimersen (Amondys 45)', | |
| 'status': 'FDA Approved β exon 45 skipping', | |
| 'diseases': ['Duchenne MD'], | |
| 'note': 'Exon skipping strategy' | |
| } | |
| ], | |
| 'phase2': [ | |
| { | |
| 'name': 'Gene therapy vectors', | |
| 'status': 'Multiple Phase 1/2 trials', | |
| 'diseases': ['Disease-specific'], | |
| 'note': 'AAV-delivered corrected gene copy' | |
| } | |
| ], | |
| 'preclinical': [ | |
| 'Base editing (adenine base editor)', | |
| 'Prime editing', | |
| 'CRISPR-Cas9 correction', | |
| 'Codon suppressor tRNA therapy', | |
| ], | |
| 'combination': [ | |
| 'Exon skipping + supportive care', | |
| 'Gene therapy + enzyme replacement', | |
| ] | |
| } | |
| } | |
| # ββ ClinicalTrials.gov integration ββββββββββββββββββββββββββββββββ | |
| READTHROUGH_DRUGS = [ | |
| 'ataluren','ptc124','elx-02','gentamicin','eloxx', | |
| 'readthrough','nonsense mutation','premature stop codon' | |
| ] | |
| def fetch_clinical_trials(gene=None, condition=None, max_trials=5): | |
| """ | |
| Fetch live clinical trials from ClinicalTrials.gov API v2 | |
| Free, no API key needed. | |
| """ | |
| try: | |
| # Build search query | |
| terms = [] | |
| if gene: | |
| terms.append(gene) | |
| terms.append('nonsense mutation readthrough') | |
| query = ' '.join(terms) | |
| url = "https://clinicaltrials.gov/api/v2/studies" | |
| params = { | |
| 'query.term': query, | |
| 'filter.overallStatus': 'RECRUITING,ACTIVE_NOT_RECRUITING,ENROLLING_BY_INVITATION', | |
| 'pageSize': max_trials, | |
| 'format': 'json', | |
| 'fields': 'NCTId,BriefTitle,Phase,OverallStatus,Condition,InterventionName,LocationCity,LocationCountry,StartDate,PrimaryCompletionDate' | |
| } | |
| resp = requests.get(url, params=params, timeout=10) | |
| if resp.status_code != 200: | |
| return [] | |
| data = resp.json() | |
| studies = data.get('studies', []) | |
| trials = [] | |
| for s in studies: | |
| proto = s.get('protocolSection', {}) | |
| ident = proto.get('identificationModule', {}) | |
| status = proto.get('statusModule', {}) | |
| desc = proto.get('conditionsModule', {}) | |
| interv = proto.get('armsInterventionsModule', {}) | |
| locs = proto.get('contactsLocationsModule', {}) | |
| interventions = [] | |
| for arm in interv.get('interventions', []): | |
| interventions.append(arm.get('name','')) | |
| conditions = desc.get('conditions', []) | |
| locations = [] | |
| for loc in locs.get('locations', [])[:3]: | |
| city = loc.get('city','') | |
| country = loc.get('country','') | |
| if city or country: | |
| locations.append(city + ', ' + country) | |
| trials.append({ | |
| 'nct_id': ident.get('nctId',''), | |
| 'title': ident.get('briefTitle',''), | |
| 'phase': status.get('phase','N/A'), | |
| 'status': status.get('overallStatus',''), | |
| 'conditions': conditions[:3], | |
| 'interventions': interventions[:3], | |
| 'locations': locations[:3], | |
| 'url': 'https://clinicaltrials.gov/study/' + ident.get('nctId',''), | |
| }) | |
| return trials | |
| except Exception as e: | |
| return [] | |
| def fetch_drug_trials(drug_name, max_trials=3): | |
| """Fetch trials for a specific drug.""" | |
| try: | |
| url = "https://clinicaltrials.gov/api/v2/studies" | |
| params = { | |
| 'query.term': drug_name + ' nonsense mutation', | |
| 'filter.overallStatus': 'RECRUITING,ACTIVE_NOT_RECRUITING', | |
| 'pageSize': max_trials, | |
| 'format': 'json', | |
| 'fields': 'NCTId,BriefTitle,Phase,OverallStatus,LocationCountry' | |
| } | |
| resp = requests.get(url, params=params, timeout=8) | |
| if resp.status_code != 200: | |
| return [] | |
| studies = resp.json().get('studies', []) | |
| results = [] | |
| for s in studies: | |
| proto = s.get('protocolSection', {}) | |
| ident = proto.get('identificationModule', {}) | |
| status = proto.get('statusModule', {}) | |
| results.append({ | |
| 'nct_id': ident.get('nctId',''), | |
| 'title': ident.get('briefTitle','')[:80], | |
| 'phase': status.get('phase',''), | |
| 'status': status.get('overallStatus',''), | |
| 'url': 'https://clinicaltrials.gov/study/' + ident.get('nctId',''), | |
| }) | |
| return results | |
| except: | |
| return [] | |
| # ββ Helper functions ββββββββββββββββββββββββββββββββββββββββββββββ | |
| def compute_rp_score_rfc(window): | |
| w = (window.upper().replace('T','U')+'N'*30)[:30] | |
| rfc = sum(SLIP_SCORES.get(b,0.5)*wt for b,wt in zip(w,POSITION_WEIGHTS)) | |
| return round(max(0.0, min(100.0, rfc/sum(POSITION_WEIGHTS)*100)), 2) | |
| def get_tier(score): | |
| if score >= HIGH_THRESHOLD: return 'HIGH' | |
| if score >= MED_THRESHOLD: return 'MEDIUM' | |
| return 'LOW' | |
| def encode_window(window): | |
| import math | |
| from collections import Counter | |
| w = (window.upper().replace('T','U')+'N'*30)[:30] | |
| slips = [SLIP_SCORES.get(b,0.50) for b in w] | |
| rfc = sum(s*wt for s,wt in zip(slips,POSITION_WEIGHTS))/sum(POSITION_WEIGHTS) | |
| p4 = w[18] | |
| p4_oh = [int(p4==b) for b in ['C','A','G','U']] | |
| stop = w[15:18] | |
| stop_oh = [int(stop==s) for s in ['UGA','UAA','UAG']] | |
| hex6 = w[18:24] | |
| hex_mean = sum(SLIP_SCORES.get(b,0.5) for b in hex6)/6 | |
| up5 = w[10:15] | |
| up_mean = sum(SLIP_SCORES.get(b,0.5) for b in up5)/5 | |
| gc = sum(1 for b in w if b in 'GC')/30.0 | |
| def entropy(seq): | |
| if not seq: return 0.0 | |
| cnt = Counter(seq); total = len(seq) | |
| return -sum((c/total)*math.log2(c/total) for c in cnt.values() if c>0) | |
| return np.array(slips+p4_oh+stop_oh+ | |
| [rfc,hex_mean,up_mean,gc,0.5,entropy(w[18:]),entropy(w[:15])], | |
| dtype=np.float32) | |
| # ββ Load brains βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| BRAIN_TYPE = "RFC-Rule" | |
| rfc_model = None | |
| dnabert_model = None | |
| dnabert_tok = None | |
| try: | |
| import joblib | |
| rfc_model = joblib.load("models/rfc_head_weights.pkl") | |
| BRAIN_TYPE = "RFC-ML" | |
| print("RFC-ML brain loaded") | |
| except Exception as e: | |
| print("RFC-ML not found: " + str(e)) | |
| def load_dnabert(): | |
| global dnabert_model, dnabert_tok, BRAIN_TYPE | |
| try: | |
| import torch | |
| import torch.nn as nn | |
| from transformers import AutoTokenizer, BertModel, BertConfig | |
| from huggingface_hub import snapshot_download | |
| print("Loading DNABERT-2 brain...") | |
| mp = snapshot_download("zhihan1996/DNABERT-2-117M") | |
| tok = AutoTokenizer.from_pretrained(mp, trust_remote_code=True) | |
| cfg = BertConfig.from_pretrained(mp) | |
| db = BertModel.from_pretrained(mp, config=cfg, ignore_mismatched_sizes=True) | |
| class RPScoreHead(nn.Module): | |
| def __init__(self, h=768): | |
| super().__init__() | |
| self.net = nn.Sequential( | |
| nn.Linear(h,512), nn.LayerNorm(512), nn.GELU(), nn.Dropout(0.15), | |
| nn.Linear(512,256), nn.LayerNorm(256), nn.GELU(), nn.Dropout(0.10), | |
| nn.Linear(256,128), nn.GELU(), nn.Dropout(0.05), | |
| nn.Linear(128,32), nn.GELU(), | |
| nn.Linear(32,1), nn.Sigmoid() | |
| ) | |
| def forward(self, x): return self.net(x).squeeze(-1) * 100.0 | |
| class N2NModel(nn.Module): | |
| def __init__(self, db): | |
| super().__init__() | |
| self.encoder = db | |
| self.head = RPScoreHead() | |
| def forward(self, ids, mask): | |
| out = self.encoder(input_ids=ids, attention_mask=mask) | |
| return self.head(out.last_hidden_state[:,0,:]) | |
| m = N2NModel(db) | |
| w = "models/n2n_dnabert2_v2.pt" | |
| if os.path.exists(w): | |
| import torch | |
| ck = torch.load(w, map_location='cpu') | |
| m.load_state_dict(ck['model_state_dict']) | |
| m.eval() | |
| dnabert_model = m | |
| dnabert_tok = tok | |
| BRAIN_TYPE = "DNABERT-2" | |
| print("DNABERT-2 v2 loaded. Pearson r=0.941") | |
| else: | |
| print("v2 weights not found") | |
| except Exception as e: | |
| print("DNABERT-2 failed: " + str(e)) | |
| threading.Thread(target=load_dnabert, daemon=True).start() | |
| def predict(window): | |
| if dnabert_model is not None and dnabert_tok is not None: | |
| try: | |
| import torch | |
| enc = dnabert_tok(window, return_tensors='pt', | |
| max_length=36, padding='max_length', truncation=True) | |
| with torch.no_grad(): | |
| s = dnabert_model(enc['input_ids'], enc['attention_mask']).item() | |
| return round(s, 3), "DNABERT-2" | |
| except: | |
| pass | |
| if rfc_model is not None: | |
| try: | |
| s = float(rfc_model.predict(encode_window(window).reshape(1,-1))[0]) | |
| return round(max(0,min(100,s))/100, 3), "RFC-ML" | |
| except: | |
| pass | |
| return round(compute_rp_score_rfc(window)/100, 3), "RFC-Rule" | |
| # ββ Routes ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def home(): | |
| return jsonify({ | |
| 'name': 'N2N Precision Engine', | |
| 'version': '3.0', | |
| 'brain': BRAIN_TYPE, | |
| 'inventor': 'Manav Vanga', | |
| 'patent': 'Pending 2026', | |
| 'description': 'Predicts readthrough therapy response for all nonsense mutation diseases', | |
| 'calibration': {'high_threshold': HIGH_THRESHOLD, 'med_threshold': MED_THRESHOLD}, | |
| 'endpoints': ['/api/health', '/api/score', '/api/demo', '/api/trials'], | |
| }) | |
| def health(): | |
| return jsonify({ | |
| 'status': 'healthy', | |
| 'brain': BRAIN_TYPE, | |
| 'version': '3.0', | |
| 'calibrated': True, | |
| 'thresholds': {'high': HIGH_THRESHOLD, 'med': MED_THRESHOLD}, | |
| }) | |
| def score(): | |
| if request.method == 'POST': | |
| data = request.get_json() or {} | |
| window = data.get('window','') | |
| gene = data.get('gene','UNKNOWN') | |
| fetch_trials = data.get('trials', True) | |
| else: | |
| window = request.args.get('window','') | |
| gene = request.args.get('gene','UNKNOWN') | |
| fetch_trials = request.args.get('trials','true').lower() == 'true' | |
| if not window or len(window) < 20: | |
| return jsonify({'error': 'window required (min 20bp DNA sequence)'}), 400 | |
| window = window.upper().replace('U','T') | |
| score, brain_used = predict(window) | |
| tier = get_tier(score) | |
| w = (window+'N'*30)[:30] | |
| p4 = w[18] if len(w)>18 else 'N' | |
| road, road_desc = PLUS4_ROAD.get(p4, ('Unknown','Unknown')) | |
| drugs = DRUG_DATABASE[tier] | |
| audit = hashlib.sha256( | |
| (window+str(score)+datetime.now(timezone.utc).isoformat() | |
| ).encode()).hexdigest()[:16] | |
| # Fetch live clinical trials | |
| trials = [] | |
| if fetch_trials: | |
| trials = fetch_clinical_trials(gene=gene if gene != 'UNKNOWN' else None) | |
| return jsonify({ | |
| 'gene': gene, | |
| 'window': window[:30], | |
| 'rp_score': score, | |
| 'tier': tier, | |
| 'plus4_base': p4, | |
| 'plus4_road': road, | |
| 'plus4_road_desc': road_desc, | |
| 'therapy': drugs['therapy'], | |
| 'mechanism': drugs['mechanism'], | |
| 'approved_drugs': drugs['approved'], | |
| 'phase3_drugs': drugs['phase3'], | |
| 'phase2_drugs': drugs['phase2'], | |
| 'preclinical': drugs['preclinical'], | |
| 'combination': drugs['combination'], | |
| 'clinical_trials': trials, | |
| 'brain': brain_used, | |
| 'confidence': 'HIGH' if brain_used=='DNABERT-2' else 'MEDIUM', | |
| 'audit_hash': audit, | |
| 'timestamp': datetime.now(timezone.utc).isoformat(), | |
| 'inventor': 'Manav Vanga', | |
| 'patent': 'Pending 2026', | |
| }) | |
| def trials(): | |
| """Live clinical trials from ClinicalTrials.gov""" | |
| gene = request.args.get('gene','') | |
| condition = request.args.get('condition','') | |
| drug = request.args.get('drug','') | |
| if drug: | |
| results = fetch_drug_trials(drug) | |
| else: | |
| results = fetch_clinical_trials(gene=gene, condition=condition) | |
| return jsonify({ | |
| 'query': {'gene':gene, 'condition':condition, 'drug':drug}, | |
| 'count': len(results), | |
| 'trials': results, | |
| 'source': 'ClinicalTrials.gov API v2', | |
| 'note': 'Live data β refreshed on every request', | |
| }) | |
| def demo(): | |
| demos = [ | |
| ('CFTR','Y122X', 'AAGAAATCGATCAGTTAACAGCTTGCAGCN', '18.5% paper'), | |
| ('CFTR','G542X', 'AAGAAATCGATCAGTTGAGAGCTTGCAGCN', '0.3% paper'), | |
| ('CFTR','W1282X','AAGAAATCGATCAGTTGACAGCTTGCAGCN', '8.2% paper'), | |
| ('DMD', 'Q1922X','GCAGCAGCAGCAGCATGACGCAGCAGCAGC', 'predicted HIGH'), | |
| ('TP53','R213X', 'CGCGGCGGCGGCGGTGACGCAGCAGCAGCN', 'predicted HIGH'), | |
| ] | |
| results = [] | |
| for gene, variant, window, expected in demos: | |
| s, brain = predict(window) | |
| results.append({ | |
| 'gene': gene, | |
| 'variant': variant, | |
| 'rp_score': s, | |
| 'tier': get_tier(s), | |
| 'expected': expected, | |
| 'brain': brain, | |
| }) | |
| return jsonify({ | |
| 'demo_results': results, | |
| 'brain': BRAIN_TYPE, | |
| 'calibration': {'high': HIGH_THRESHOLD, 'med': MED_THRESHOLD}, | |
| }) | |
| if __name__ == '__main__': | |
| port = int(os.environ.get('PORT', 7860)) | |
| app.run(host='0.0.0.0', port=port) |