Spaces:
Sleeping
Sleeping
| """ | |
| TopoGrammar - Grammar-Aware 3D Genome Engine Demo | |
| ================================================== | |
| Hugging Face Spaces Demo for TopoGrammar v2.1.0 | |
| The Industry's First Grammar-Aware Engine for Balanced Structural Variants | |
| Copyright (c) 2026 Bryan Daugherty, Gregory Ward & Shawn Ryan. All Rights Reserved. | |
| This demo showcases TopoGrammar's capabilities without exposing proprietary algorithms. | |
| All results are pre-computed demonstrations for educational purposes. | |
| """ | |
| import gradio as gr | |
| import hashlib | |
| import random | |
| import time | |
| import httpx | |
| import json | |
| import os | |
| from datetime import datetime | |
| from typing import Optional, Dict, List, Tuple | |
| # ============================================================================= | |
| # BSV Blockchain Configuration | |
| # ============================================================================= | |
| BSV_API_URL = "https://simplebsv.codenlighten.org" | |
| BSV_API_KEY = os.getenv("BSV_API_KEY") # Set via HF Secrets | |
| def publish_to_bsv(data: Dict) -> Optional[str]: | |
| """ | |
| Publish attestation data to BSV blockchain via SimpleBSV API. | |
| Args: | |
| data: Dictionary containing attestation data (no trade secrets) | |
| Returns: | |
| Transaction ID if successful, None otherwise | |
| """ | |
| # Guard clause: fail gracefully if no key configured | |
| if not BSV_API_KEY: | |
| print("BSV_API_KEY not configured - running in offline mode") | |
| return None | |
| try: | |
| headers = { | |
| "Content-Type": "application/json", | |
| "x-api-key": BSV_API_KEY | |
| } | |
| # Use ?wait=true for synchronous response with txid | |
| response = httpx.post( | |
| f"{BSV_API_URL}/publish/json?wait=true", | |
| headers=headers, | |
| json={"json": data}, | |
| timeout=30.0 | |
| ) | |
| if response.status_code == 200: | |
| result = response.json() | |
| return result.get("txid") | |
| else: | |
| print(f"BSV API error: {response.status_code} - {response.text}") | |
| return None | |
| except Exception as e: | |
| print(f"BSV publish error: {e}") | |
| return None | |
| # ============================================================================= | |
| # Demo Configuration - Pre-computed results (no trade secrets exposed) | |
| # ============================================================================= | |
| # Demo VUS Examples with pre-computed classifications | |
| VUS_EXAMPLES = { | |
| "brca1_boundary": { | |
| "name": "BRCA1 TAD Boundary Disruption", | |
| "variant_id": "chr17:43,044,295 G>A", | |
| "gene": "BRCA1", | |
| "initial_class": "VUS", | |
| "final_class": "Pathogenic", | |
| "confidence": 0.93, | |
| "mechanism": "TAD Boundary Disruption", | |
| "evidence_codes": ["PS3_insulation", "PM1_boundary", "PS3_ctcf"], | |
| "insulation_change": -0.52, | |
| "description": "This intronic variant disrupts a CTCF binding site at a critical TAD boundary, causing enhancer-promoter miscommunication affecting BRCA1 expression.", | |
| }, | |
| "myc_enhancer_hijack": { | |
| "name": "MYC Enhancer Hijacking", | |
| "variant_id": "chr8:128,750,000 inv(500kb)", | |
| "gene": "MYC", | |
| "initial_class": "VUS", | |
| "final_class": "Pathogenic", | |
| "confidence": 0.89, | |
| "mechanism": "Enhancer Hijacking", | |
| "evidence_codes": ["PS3_neoloop", "PS2_oncogene", "PM1_boundary"], | |
| "insulation_change": -0.68, | |
| "description": "Balanced inversion creates a neo-loop connecting MYC to a hijacked super-enhancer, causing oncogene activation without copy number change.", | |
| }, | |
| "sonic_hedgehog": { | |
| "name": "SHH Limb Enhancer", | |
| "variant_id": "chr7:156,584,000 del(50kb)", | |
| "gene": "SHH", | |
| "initial_class": "VUS", | |
| "final_class": "Likely Pathogenic", | |
| "confidence": 0.85, | |
| "mechanism": "Enhancer Deletion", | |
| "evidence_codes": ["PM1_regulatory", "PP3_conservation", "PS3_insulation"], | |
| "insulation_change": -0.41, | |
| "description": "Deletion removes the ZRS limb enhancer from SHH regulatory domain, disrupting developmental gene expression.", | |
| }, | |
| "benign_intronic": { | |
| "name": "Benign Intronic SNP", | |
| "variant_id": "chr12:25,398,284 C>T", | |
| "gene": "KRAS", | |
| "initial_class": "VUS", | |
| "final_class": "Benign", | |
| "confidence": 0.91, | |
| "mechanism": "No 3D Impact", | |
| "evidence_codes": ["BS1_frequency", "BP4_no_disruption"], | |
| "insulation_change": 0.02, | |
| "description": "Deep intronic variant with no effect on TAD structure, CTCF binding, or regulatory grammar. Common in population databases.", | |
| }, | |
| "tp53_scramble": { | |
| "name": "TP53 Grammar Scrambling", | |
| "variant_id": "chr17:7,670,000 inv(120kb)", | |
| "gene": "TP53", | |
| "initial_class": "VUS", | |
| "final_class": "Pathogenic", | |
| "confidence": 0.96, | |
| "mechanism": "Regulatory Grammar Scrambling", | |
| "evidence_codes": ["PS3_semantic", "PS3_insulation", "PM1_boundary", "PP3_conservation"], | |
| "insulation_change": -0.71, | |
| "description": "Balanced inversion scrambles the regulatory grammar of TP53, inverting enhancer-promoter orientation and disrupting tumor suppressor expression.", | |
| }, | |
| } | |
| # Demo TAD examples | |
| TAD_EXAMPLES = { | |
| "chr21_dscr": { | |
| "name": "Chromosome 21 - Down Syndrome Critical Region", | |
| "region": "chr21:35,000,000-40,000,000", | |
| "n_tads": 4, | |
| "n_boundaries": 5, | |
| "ctcf_sites": 12, | |
| "genes": ["DSCR1", "DSCR3", "DSCR4", "RUNX1"], | |
| "boundary_strength": [0.85, 0.92, 0.78, 0.88, 0.81], | |
| }, | |
| "chr7_egfr": { | |
| "name": "Chromosome 7 - EGFR Locus", | |
| "region": "chr7:55,000,000-56,500,000", | |
| "n_tads": 3, | |
| "n_boundaries": 4, | |
| "ctcf_sites": 8, | |
| "genes": ["EGFR", "LANCL2", "VOPP1"], | |
| "boundary_strength": [0.91, 0.87, 0.94, 0.82], | |
| }, | |
| "chr8_myc": { | |
| "name": "Chromosome 8 - MYC Oncogene", | |
| "region": "chr8:127,500,000-129,500,000", | |
| "n_tads": 2, | |
| "n_boundaries": 3, | |
| "ctcf_sites": 6, | |
| "genes": ["MYC", "PVT1"], | |
| "boundary_strength": [0.96, 0.89, 0.93], | |
| }, | |
| } | |
| # Demo Neo-Loop examples | |
| NEOLOOP_EXAMPLES = { | |
| "burkitt_myc": { | |
| "name": "Burkitt Lymphoma - MYC Translocation", | |
| "sv_type": "Translocation t(8;14)", | |
| "oncogene": "MYC", | |
| "hijacked_enhancer": "IGH Super-Enhancer", | |
| "loop_strength": 0.92, | |
| "clinical_priority": "Critical", | |
| "cancer_type": "Burkitt Lymphoma", | |
| "description": "Classic t(8;14) translocation juxtaposes MYC with immunoglobulin heavy chain enhancers, creating pathogenic neo-loop.", | |
| }, | |
| "ewing_ewsr1": { | |
| "name": "Ewing Sarcoma - EWSR1-FLI1", | |
| "sv_type": "Translocation t(11;22)", | |
| "oncogene": "EWSR1-FLI1 fusion", | |
| "hijacked_enhancer": "GGAA microsatellite enhancers", | |
| "loop_strength": 0.88, | |
| "clinical_priority": "Critical", | |
| "cancer_type": "Ewing Sarcoma", | |
| "description": "Fusion protein creates neo-loops at GGAA microsatellites, aberrantly activating developmental genes.", | |
| }, | |
| "aml_runx1": { | |
| "name": "AML - RUNX1 Disruption", | |
| "sv_type": "Inversion inv(16)", | |
| "oncogene": "CBFB-MYH11 fusion", | |
| "hijacked_enhancer": "Myeloid enhancer cluster", | |
| "loop_strength": 0.85, | |
| "clinical_priority": "High", | |
| "cancer_type": "Acute Myeloid Leukemia", | |
| "description": "Pericentric inversion disrupts normal RUNX1 regulation, creating aberrant chromatin loops.", | |
| }, | |
| } | |
| # Benchmark comparisons | |
| BENCHMARK_DATA = { | |
| "tad_detection": { | |
| "TopoGrammar (CTCF-gated)": {"accuracy": 91, "f1": 0.91, "subtad": True}, | |
| "HiCCUPS": {"accuracy": 80, "f1": 0.76, "subtad": False}, | |
| "Arrowhead": {"accuracy": 78, "f1": 0.74, "subtad": False}, | |
| "TopDom": {"accuracy": 75, "f1": 0.71, "subtad": False}, | |
| }, | |
| "vus_resolution": { | |
| "reclassification_rate": 68, | |
| "pathogenic_accuracy": 93, | |
| "mean_confidence": 87, | |
| }, | |
| "insulation_density": { | |
| "major_tads": 9.11, | |
| "all_boundaries": 15.57, | |
| "improvement": 70.9, | |
| }, | |
| } | |
| # ============================================================================= | |
| # Visualization Functions | |
| # ============================================================================= | |
| def create_contact_map_ascii(region: str, n_tads: int) -> str: | |
| """Create ASCII representation of a Hi-C contact map with TADs.""" | |
| size = 40 | |
| map_lines = [] | |
| # Create diagonal pattern with TAD blocks | |
| tad_size = size // n_tads | |
| for i in range(size): | |
| row = "" | |
| for j in range(size): | |
| if abs(i - j) <= 2: | |
| row += "ββ" # Diagonal | |
| elif (i // tad_size) == (j // tad_size): | |
| # Within same TAD | |
| distance = abs(i - j) | |
| if distance < tad_size // 2: | |
| row += "ββ" | |
| elif distance < tad_size: | |
| row += "ββ" | |
| else: | |
| row += " " | |
| else: | |
| row += " " | |
| map_lines.append(row) | |
| return "\n".join(map_lines) | |
| def create_insulation_profile(boundary_strengths: List[float]) -> str: | |
| """Create ASCII insulation score profile.""" | |
| lines = [] | |
| lines.append("Insulation Score Profile:") | |
| lines.append("=" * 60) | |
| for i, strength in enumerate(boundary_strengths): | |
| bar_len = int(strength * 40) | |
| bar = "β" * bar_len + "β" * (40 - bar_len) | |
| lines.append(f"Boundary {i+1}: [{bar}] {strength:.2f}") | |
| lines.append("=" * 60) | |
| return "\n".join(lines) | |
| def create_grammar_diagram(mechanism: str) -> str: | |
| """Create regulatory grammar visualization.""" | |
| if mechanism == "Regulatory Grammar Scrambling": | |
| return """ | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| β REGULATORY GRAMMAR ANALYSIS β | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ€ | |
| β β | |
| β REFERENCE GRAMMAR: β | |
| β ββββββββ ββββββββββββ ββββββββββββ ββββββββ β | |
| β βCTCF+ β β β ENHANCER β β β PROMOTER β β β GENE β β | |
| β ββββββββ ββββββββββββ ββββββββββββ ββββββββ β | |
| β "The enhancer activates the gene" β | |
| β β | |
| β VARIANT GRAMMAR (SCRAMBLED): β | |
| β ββββββββ ββββββββββββ ββββββββββββ ββββββββ β | |
| β β GENE β β β PROMOTER β β β ENHANCER β β βCTCF- β β | |
| β ββββββββ ββββββββββββ ββββββββββββ ββββββββ β | |
| β "Gene the activates enhancer the" β SCRAMBLED β | |
| β β | |
| β SEMANTIC BREAK SCORE: 0.85 (CRITICAL) β | |
| β β’ Token Disruption: 30% weight β | |
| β β’ Order Inversion: 40% weight β | |
| β β’ Orientation Flip: 30% weight β | |
| β β | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| """ | |
| elif mechanism == "Enhancer Hijacking": | |
| return """ | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| β ENHANCER HIJACKING ANALYSIS β | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ€ | |
| β β | |
| β NORMAL TOPOLOGY: β | |
| β βββββββββββββββββββ βββββββββββββββββββ β | |
| β β TAD A β β TAD B β β | |
| β β [Enhancer]ββββββββXββββββββββ[Oncogene] β β | |
| β β β β β β β | |
| β β [Target Gene] β β β β | |
| β βββββββββββββββββββ βββββββββββββββββββ β | |
| β BOUNDARY BLOCKS CONTACT β | |
| β β | |
| β AFTER STRUCTURAL VARIANT: β | |
| β ββββββββββββββββββββββββββββββββββββββββββββ β | |
| β β FUSED TAD β β | |
| β β [Enhancer]βββββββββββββββββββ[Oncogene] β β | |
| β β β NEO-LOOP FORMED β β β | |
| β β [Target Gene] β ONCOGENE ACTIVATED β β β | |
| β ββββββββββββββββββββββββββββββββββββββββββββ β | |
| β β | |
| β LOOP STRENGTH: 0.89 | PRIORITY: CRITICAL β | |
| β β | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| """ | |
| elif mechanism == "TAD Boundary Disruption": | |
| return """ | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| β TAD BOUNDARY DISRUPTION ANALYSIS β | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ€ | |
| β β | |
| β NORMAL INSULATION: β | |
| β ββββββββββββββββ β ββββββββββββββββ β | |
| β TAD A β TAD B β | |
| β CTCFβCTCF β | |
| β β²β²β²β²β²ββ²β²β²β²β² β | |
| β Strong Boundary β | |
| β β | |
| β AFTER VARIANT (CTCF SITE DISRUPTED): β | |
| β ββββββββββββββββ ββββββββββββββββ β | |
| β TAD A βββββββ TAD B β | |
| β βββββ β | |
| β Insulation Leak β | |
| β β | |
| β INSULATION CHANGE: -52% β | |
| β BOUNDARY LOSS: MAJOR β | |
| β β | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| """ | |
| else: | |
| return """ | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| β 3D GENOME ANALYSIS β | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ€ | |
| β β | |
| β No significant 3D structural impact detected. β | |
| β β | |
| β β’ TAD boundaries: INTACT β | |
| β β’ Insulation scores: NORMAL β | |
| β β’ Regulatory grammar: PRESERVED β | |
| β β’ CTCF binding: UNAFFECTED β | |
| β β | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| """ | |
| def create_confidence_bars(evidence_codes: List[str], confidence: float) -> str: | |
| """Create evidence code visualization.""" | |
| lines = [] | |
| lines.append("\n### Evidence Summary\n") | |
| for code in evidence_codes: | |
| # Determine strength from code prefix | |
| if code.startswith("PS"): | |
| strength = "Strong" | |
| bar = "ββββββββββββββββββββ" | |
| color = "π’" | |
| elif code.startswith("PM"): | |
| strength = "Moderate" | |
| bar = "ββββββββββββββββββββ" | |
| color = "π‘" | |
| elif code.startswith("PP"): | |
| strength = "Supporting" | |
| bar = "ββββββββββββββββββββ" | |
| color = "π " | |
| elif code.startswith("BS") or code.startswith("BP"): | |
| strength = "Benign" | |
| bar = "ββββββββββββββββββββ" | |
| color = "π΅" | |
| else: | |
| strength = "Unknown" | |
| bar = "ββββββββββββββββββββ" | |
| color = "βͺ" | |
| lines.append(f"{color} **{code}** ({strength}): `{bar}`") | |
| lines.append(f"\n**Overall Confidence**: {confidence:.0%}") | |
| return "\n".join(lines) | |
| def generate_bsv_attestation(variant_id: str, classification: str, confidence: float) -> str: | |
| """Generate and publish BSV blockchain attestation.""" | |
| timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC") | |
| # Create attestation data (no trade secrets - only results summary) | |
| attestation_data = { | |
| "platform": "TopoGrammar", | |
| "version": "v2.1.0", | |
| "timestamp": timestamp, | |
| "analysis_type": "VUS_Resolution", | |
| "variant_id": variant_id, | |
| "classification": classification, | |
| "confidence": round(confidence, 2), | |
| "attestation_type": "demo" | |
| } | |
| # Create hash of the attestation data | |
| data_str = json.dumps(attestation_data, sort_keys=True) | |
| query_hash = hashlib.sha256(data_str.encode()).hexdigest()[:16] | |
| full_hash = hashlib.sha256(data_str.encode()).hexdigest() | |
| # Publish to BSV blockchain | |
| txid = publish_to_bsv(attestation_data) | |
| if txid: | |
| # Real blockchain attestation | |
| whatsonchain_url = f"https://whatsonchain.com/tx/{txid}" | |
| status_line = f"Status: β RECORDED ON BSV MAINNET" | |
| txid_display = txid[:20] + "..." if len(txid) > 20 else txid | |
| verify_section = f"""β Transaction ID: {txid_display} | |
| β β | |
| β π Verify on WhatsOnChain: β | |
| β {whatsonchain_url[:54]}""" | |
| else: | |
| # Fallback if API fails | |
| status_line = "Status: β OFFLINE MODE (BSV API unavailable)" | |
| verify_section = f"""β Data Hash: {full_hash[:32]}... | |
| β β | |
| β βΉ Blockchain recording temporarily unavailable. β | |
| β Result hash preserved for later attestation.""" | |
| return f""" | |
| ``` | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| β TOPOGRAMMAR BSV ATTESTATION CERTIFICATE β | |
| β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ£ | |
| β β | |
| β Query Hash: {query_hash} β | |
| β Timestamp: {timestamp} β | |
| β Model Version: TopoGrammar v2.1.0 β | |
| β β | |
| β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ β | |
| β β | |
| β Variant: {variant_id} | |
| β Classification: {classification} | |
| β Confidence: {confidence:.1%} | |
| β β | |
| β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ β | |
| β β | |
| β {status_line} | |
| β Network: BSV Mainnet β | |
| β β | |
| {verify_section} | |
| β β | |
| β This attestation is immutably recorded on BSV blockchain. β | |
| β No proprietary algorithms or trade secrets are published. β | |
| β β | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| ``` | |
| """ | |
| def create_share_links(variant_id: str, classification: str) -> str: | |
| """Create social sharing buttons.""" | |
| import urllib.parse | |
| text = f"𧬠TopoGrammar reclassified {variant_id} as {classification}! Grammar-aware 3D genome analysis for precision medicine. #Genomics #AI #PrecisionMedicine" | |
| twitter_url = f"https://twitter.com/intent/tweet?text={urllib.parse.quote(text)}&url=https://huggingface.co/spaces/GotThatData/TopoGrammar" | |
| linkedin_url = f"https://www.linkedin.com/sharing/share-offsite/?url=https://huggingface.co/spaces/GotThatData/TopoGrammar" | |
| return f""" | |
| <div style="display: flex; gap: 10px; margin-top: 20px;"> | |
| <a href="{twitter_url}" target="_blank" style="background: #1DA1F2; color: white; padding: 10px 20px; border-radius: 5px; text-decoration: none;"> | |
| π¦ Share on X | |
| </a> | |
| <a href="{linkedin_url}" target="_blank" style="background: #0077B5; color: white; padding: 10px 20px; border-radius: 5px; text-decoration: none;"> | |
| πΌ Share on LinkedIn | |
| </a> | |
| </div> | |
| """ | |
| # ============================================================================= | |
| # Main Demo Functions | |
| # ============================================================================= | |
| def run_vus_analysis(example_key: str) -> str: | |
| """Run VUS resolution demo.""" | |
| if example_key not in VUS_EXAMPLES: | |
| return "β Example not found. Please select a valid example." | |
| # Simulate processing delay | |
| time.sleep(1.5) | |
| example = VUS_EXAMPLES[example_key] | |
| # Build result markdown | |
| result = f""" | |
| # 𧬠VUS Resolution Analysis | |
| ## Variant Information | |
| | Field | Value | | |
| |-------|-------| | |
| | **Variant ID** | `{example['variant_id']}` | | |
| | **Gene** | {example['gene']} | | |
| | **Initial Classification** | {example['initial_class']} | | |
| --- | |
| ## TopoGrammar Analysis Result | |
| ### Classification Update | |
| | Before | β | After | | |
| |--------|---|-------| | |
| | **{example['initial_class']}** | π | **{example['final_class']}** | | |
| ### Confidence Score | |
| {"β" * int(example['confidence'] * 20)}{"β" * (20 - int(example['confidence'] * 20))} **{example['confidence']:.0%}** | |
| ### Primary Mechanism | |
| **{example['mechanism']}** | |
| ### 3D Structural Impact | |
| - **Insulation Change**: {example['insulation_change']:+.0%} | |
| --- | |
| ## Mechanism Visualization | |
| {create_grammar_diagram(example['mechanism'])} | |
| --- | |
| {create_confidence_bars(example['evidence_codes'], example['confidence'])} | |
| --- | |
| ## Clinical Interpretation | |
| {example['description']} | |
| --- | |
| ## BSV Verification | |
| {generate_bsv_attestation(example['variant_id'], example['final_class'], example['confidence'])} | |
| --- | |
| {create_share_links(example['variant_id'], example['final_class'])} | |
| """ | |
| return result | |
| def run_tad_analysis(example_key: str) -> str: | |
| """Run TAD detection demo.""" | |
| if example_key not in TAD_EXAMPLES: | |
| return "β Example not found. Please select a valid example." | |
| time.sleep(1.0) | |
| example = TAD_EXAMPLES[example_key] | |
| result = f""" | |
| # π¬ TAD Detection Analysis | |
| ## Region Information | |
| | Field | Value | | |
| |-------|-------| | |
| | **Region** | `{example['region']}` | | |
| | **TADs Detected** | {example['n_tads']} | | |
| | **Boundaries** | {example['n_boundaries']} | | |
| | **CTCF Sites** | {example['ctcf_sites']} | | |
| --- | |
| ## Genes in Region | |
| {', '.join([f"**{g}**" for g in example['genes']])} | |
| --- | |
| ## Contact Map Visualization | |
| ``` | |
| {create_contact_map_ascii(example['region'], example['n_tads'])} | |
| ``` | |
| --- | |
| ## Boundary Strength Profile | |
| ``` | |
| {create_insulation_profile(example['boundary_strength'])} | |
| ``` | |
| --- | |
| ## Detection Method | |
| TopoGrammar uses **CTCF-gated boundary detection** which achieves: | |
| - **91% TAD accuracy** (vs 80% for HiCCUPS) | |
| - **0.91 F1 score** for boundary detection | |
| - **Sub-TAD detection** capability | |
| The CTCF-gating mechanism ensures boundaries are only called where: | |
| 1. Insulation score shows local minimum | |
| 2. CTCF binding evidence is present | |
| 3. Gradient analysis confirms boundary | |
| --- | |
| ## Benchmark Comparison | |
| | Method | TAD Accuracy | Boundary F1 | Sub-TAD | | |
| |--------|-------------|-------------|---------| | |
| | **TopoGrammar** | **91%** | **0.91** | β | | |
| | HiCCUPS | 80% | 0.76 | β | | |
| | Arrowhead | 78% | 0.74 | β | | |
| | TopDom | 75% | 0.71 | β | | |
| """ | |
| return result | |
| def run_neoloop_analysis(example_key: str) -> str: | |
| """Run neo-loop detection demo.""" | |
| if example_key not in NEOLOOP_EXAMPLES: | |
| return "β Example not found. Please select a valid example." | |
| time.sleep(1.2) | |
| example = NEOLOOP_EXAMPLES[example_key] | |
| # Priority styling | |
| if example['clinical_priority'] == "Critical": | |
| priority_emoji = "π΄" | |
| priority_style = "color: red; font-weight: bold;" | |
| else: | |
| priority_emoji = "π‘" | |
| priority_style = "color: orange; font-weight: bold;" | |
| result = f""" | |
| # π§ͺ Neo-Loop Detection Analysis | |
| ## Structural Variant | |
| | Field | Value | | |
| |-------|-------| | |
| | **SV Type** | `{example['sv_type']}` | | |
| | **Cancer Type** | {example['cancer_type']} | | |
| | **Clinical Priority** | {priority_emoji} **{example['clinical_priority']}** | | |
| --- | |
| ## Oncogene Activation | |
| ### Activated Oncogene | |
| **{example['oncogene']}** | |
| ### Hijacked Enhancer | |
| **{example['hijacked_enhancer']}** | |
| ### Neo-Loop Strength | |
| {"β" * int(example['loop_strength'] * 20)}{"β" * (20 - int(example['loop_strength'] * 20))} **{example['loop_strength']:.0%}** | |
| --- | |
| ## Mechanism Visualization | |
| ``` | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| β NEO-LOOP FORMATION β | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ€ | |
| β β | |
| β BEFORE: {example['sv_type']} | |
| β ββββββββββββββ ββββββββββββββ β | |
| β β Enhancer ββ β X β ββ Oncogene β β | |
| β β Domain A β β Domain B β β | |
| β ββββββββββββββ ββββββββββββββ β | |
| β β β | |
| β Normal Target β | |
| β β | |
| β AFTER: {example['sv_type']} | |
| β ββββββββββββββββββββββββββββββββββββββββ β | |
| β β Enhancer ββββββββββ Oncogene β β | |
| β β β NEO-LOOP β β β | |
| β β ABERRANT ACTIVATION β β | |
| β ββββββββββββββββββββββββββββββββββββββββ β | |
| β β | |
| β Loop Strength: {example['loop_strength']:.2f} | |
| β Priority: {example['clinical_priority']} | |
| β β | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| ``` | |
| --- | |
| ## Clinical Interpretation | |
| {example['description']} | |
| --- | |
| ## Actionable Insights | |
| Based on this neo-loop detection: | |
| 1. **Molecular Testing**: Confirm {example['sv_type']} by FISH or karyotyping | |
| 2. **Targeted Therapy**: Consider therapies targeting {example['oncogene']} pathway | |
| 3. **Clinical Trial**: Patient may be eligible for trials targeting this mechanism | |
| 4. **Monitoring**: Track {example['oncogene']} expression as biomarker | |
| --- | |
| ## BSV Verification | |
| {generate_bsv_attestation(example['sv_type'], f"Neo-loop: {example['oncogene']}", example['loop_strength'])} | |
| """ | |
| return result | |
| def show_benchmarks() -> str: | |
| """Show benchmark results.""" | |
| return f""" | |
| # π TopoGrammar Benchmarks | |
| ## TAD Detection Performance | |
| | Method | TAD Accuracy | Boundary F1 | Sub-TAD Detection | | |
| |--------|-------------|-------------|-------------------| | |
| | **TopoGrammar (CTCF-gated)** | **91%** | **0.91** | β Yes | | |
| | HiCCUPS | 80% | 0.76 | β No | | |
| | Arrowhead | 78% | 0.74 | β No | | |
| | TopDom | 75% | 0.71 | β No | | |
| --- | |
| ## VUS Resolution Performance | |
| | Metric | Value | | |
| |--------|-------| | |
| | **Reclassification Rate** | 68% of VUS variants | | |
| | **Pathogenic Accuracy** | 93% | | |
| | **Mean Confidence Score** | 87% | | |
| --- | |
| ## Insulation Density Improvement | |
| | Boundary Set | Insulation Ratio | P-value | | |
| |--------------|------------------|---------| | |
| | Major TADs only | 9.11x | <0.0001 | | |
| | **All Boundaries** | **15.57x** | **<0.0001** | | |
| | **Improvement** | **+70.9%** | β | | |
| --- | |
| ## What Makes TopoGrammar Different | |
| ### 1. Grammar-Aware Architecture | |
| Traditional tools see DNA as a string of letters. TopoGrammar sees it as **sentences with grammar**: | |
| ``` | |
| Reference: [CTCF+] [Enhancer] [Promoter] [Gene] | |
| "The enhancer activates the gene" | |
| Inversion: [Gene] [Promoter] [Enhancer] [CTCF-] | |
| "Gene the activates enhancer the" β SCRAMBLED | |
| ``` | |
| ### 2. Physics + Semantics Concordance | |
| When both physics (insulation collapse) AND semantics (grammar scramble) agree: | |
| | Evidence Type | Alone | Concordant | | |
| |--------------|-------|------------| | |
| | Physics | PM1 (Moderate) | β | | |
| | Semantics | PM1 (Moderate) | β | | |
| | **Both** | β | **PS3 (Strong)** β UPGRADE | | |
| ### 3. CTCF-Gated Detection | |
| Unlike other tools, TopoGrammar only calls boundaries where: | |
| - β Insulation score shows local minimum | |
| - β CTCF binding evidence is present | |
| - β Gradient analysis confirms boundary | |
| This reduces false positives by **40%** compared to insulation-only methods. | |
| --- | |
| ## Clinical Value Pillars | |
| | Pillar | Clinical Value | Technical Foundation | | |
| |--------|---------------|---------------------| | |
| | **Architectural Fidelity** | Eliminates VUS by proving physical boundary collapse | PINN Physics (15.57x insulation) | | |
| | **Semantic Intelligence** | Detects "scrambled" instructions in balanced inversions | Regulatory Grammar Encoder | | |
| | **Privacy-First Growth** | Global model evolution without data leakage | Async Federated Learning | | |
| | **Clinician Clarity** | High-level medical prose instead of raw math | LLM Interpretation Layer | | |
| """ | |
| # ============================================================================= | |
| # Gradio Interface | |
| # ============================================================================= | |
| HEADER_MD = """ | |
| # 𧬠TopoGrammar | |
| ## The Industry's First Grammar-Aware 3D Genome Engine | |
| **Balanced Structural Variants (BSVs)** - inversions, translocations, complex rearrangements - appear "silent" to standard sequencers because they don't change gene dosage. But they **scramble the regulatory grammar** that controls gene expression. | |
| **TopoGrammar solves this.** It's the first engine that understands chromatin as a *language* with grammar rules that can be broken. | |
| --- | |
| | Capability | Performance | | |
| |------------|-------------| | |
| | π― VUS Reclassification | 68% of variants | | |
| | π Pathogenic Accuracy | 93% confidence | | |
| | π¬ TAD Detection F1 | 0.91 (vs 0.76 HiCCUPS) | | |
| | β‘ Sub-TAD Detection | Yes (unique capability) | | |
| --- | |
| """ | |
| ABOUT_MD = """ | |
| # βΉοΈ About TopoGrammar | |
| ## Overview | |
| TopoGrammar is part of the **OmniPrime Enterprise Platform**, integrating: | |
| - **TopoGrammar v2.1.0** - Grammar-Aware 3D Genome Engine | |
| - **BioPrime v4.0 "Golden"** - Physics-First Molecular Docking | |
| Together, they enable a seamless **Patient Genome β Drug Candidate** workflow. | |
| --- | |
| ## Core Innovation | |
| ### Regulatory Grammar Analysis | |
| TopoGrammar treats regulatory elements as a **language**: | |
| ``` | |
| CTCF β Enhancer β Promoter β Gene | |
| "The enhancer activates the gene" | |
| ``` | |
| When structural variants **scramble** this grammar, TopoGrammar detects it: | |
| ``` | |
| Gene β Promoter β Enhancer β CTCF | |
| "Gene the activates enhancer the" β PATHOGENIC | |
| ``` | |
| ### Semantic Break Score | |
| Quantifies regulatory disruption: | |
| - **Token Disruption (30%)**: Elements removed or duplicated | |
| - **Order Inversion (40%)**: Sequence rearranged | |
| - **Orientation Flip (30%)**: Strand direction reversed | |
| --- | |
| ## Technology Stack | |
| - **Physics-Informed Neural Networks (PINNs)** for 3D reconstruction | |
| - **CTCF-Gated Boundary Detection** for precise TAD calling | |
| - **Federated Learning** for privacy-preserving multi-site training | |
| - **LLM Interpretation** for clinical reporting | |
| - **BSV Blockchain** for result attestation | |
| --- | |
| ## Creators | |
| - **Bryan Daugherty** | |
| - **Gregory Ward** | |
| - **Shawn Ryan** | |
| --- | |
| ## Learn More | |
| π [bioprime.one](https://bioprime.one) | 𧬠[OmniPrime Platform](https://github.com/Saifullah62/OmniPrime_v1.0) | |
| --- | |
| **Copyright (c) 2026 Bryan Daugherty, Gregory Ward & Shawn Ryan. All Rights Reserved.** | |
| *This demo showcases TopoGrammar capabilities. Actual clinical use requires the full OmniPrime Enterprise Platform.* | |
| """ | |
| # Custom CSS | |
| CUSTOM_CSS = """ | |
| .gradio-container { | |
| background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%); | |
| } | |
| .gr-button-primary { | |
| background: linear-gradient(90deg, #00d4ff, #00ff88) !important; | |
| border: none !important; | |
| } | |
| .gr-button-secondary { | |
| background: linear-gradient(90deg, #667eea, #764ba2) !important; | |
| border: none !important; | |
| color: white !important; | |
| } | |
| """ | |
| # Build the interface | |
| with gr.Blocks( | |
| title="TopoGrammar - Grammar-Aware 3D Genome Engine", | |
| theme=gr.themes.Base( | |
| primary_hue="cyan", | |
| secondary_hue="purple", | |
| neutral_hue="slate", | |
| ), | |
| css=CUSTOM_CSS, | |
| ) as demo: | |
| gr.Markdown(HEADER_MD) | |
| with gr.Tabs(): | |
| # Tab 1: VUS Resolution | |
| with gr.TabItem("𧬠VUS Resolution"): | |
| gr.Markdown(""" | |
| ## Variant of Uncertain Significance β Clinical Classification | |
| Select a demo variant to see how TopoGrammar reclassifies VUS using 3D genome analysis. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Select Example") | |
| vus_brca1 = gr.Button("π΄ BRCA1 Boundary Disruption", variant="secondary") | |
| vus_myc = gr.Button("π΄ MYC Enhancer Hijacking", variant="secondary") | |
| vus_shh = gr.Button("π‘ SHH Limb Enhancer", variant="secondary") | |
| vus_tp53 = gr.Button("π΄ TP53 Grammar Scrambling", variant="secondary") | |
| vus_benign = gr.Button("π’ Benign Intronic SNP", variant="secondary") | |
| with gr.Column(scale=3): | |
| vus_output = gr.Markdown("*Select an example to run VUS analysis*") | |
| vus_brca1.click(fn=lambda: run_vus_analysis("brca1_boundary"), outputs=vus_output) | |
| vus_myc.click(fn=lambda: run_vus_analysis("myc_enhancer_hijack"), outputs=vus_output) | |
| vus_shh.click(fn=lambda: run_vus_analysis("sonic_hedgehog"), outputs=vus_output) | |
| vus_tp53.click(fn=lambda: run_vus_analysis("tp53_scramble"), outputs=vus_output) | |
| vus_benign.click(fn=lambda: run_vus_analysis("benign_intronic"), outputs=vus_output) | |
| # Tab 2: TAD Detection | |
| with gr.TabItem("π¬ TAD Detection"): | |
| gr.Markdown(""" | |
| ## Topologically Associating Domain Detection | |
| See how TopoGrammar detects TAD boundaries with CTCF-gating for 91% accuracy. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Select Region") | |
| tad_dscr = gr.Button("Chr21 - Down Syndrome Region", variant="secondary") | |
| tad_egfr = gr.Button("Chr7 - EGFR Locus", variant="secondary") | |
| tad_myc = gr.Button("Chr8 - MYC Oncogene", variant="secondary") | |
| with gr.Column(scale=3): | |
| tad_output = gr.Markdown("*Select a region to analyze TAD structure*") | |
| tad_dscr.click(fn=lambda: run_tad_analysis("chr21_dscr"), outputs=tad_output) | |
| tad_egfr.click(fn=lambda: run_tad_analysis("chr7_egfr"), outputs=tad_output) | |
| tad_myc.click(fn=lambda: run_tad_analysis("chr8_myc"), outputs=tad_output) | |
| # Tab 3: Neo-Loop Detection | |
| with gr.TabItem("π§ͺ Neo-Loop Detection"): | |
| gr.Markdown(""" | |
| ## Cancer Neo-Loop & Enhancer Hijacking Detection | |
| Identify oncogene activation through structural variant-induced neo-loops. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Select Cancer Example") | |
| neo_burkitt = gr.Button("π΄ Burkitt Lymphoma (MYC)", variant="secondary") | |
| neo_ewing = gr.Button("π΄ Ewing Sarcoma (EWSR1)", variant="secondary") | |
| neo_aml = gr.Button("π‘ AML (RUNX1)", variant="secondary") | |
| with gr.Column(scale=3): | |
| neo_output = gr.Markdown("*Select a cancer example to detect neo-loops*") | |
| neo_burkitt.click(fn=lambda: run_neoloop_analysis("burkitt_myc"), outputs=neo_output) | |
| neo_ewing.click(fn=lambda: run_neoloop_analysis("ewing_ewsr1"), outputs=neo_output) | |
| neo_aml.click(fn=lambda: run_neoloop_analysis("aml_runx1"), outputs=neo_output) | |
| # Tab 4: Benchmarks | |
| with gr.TabItem("π Benchmarks"): | |
| gr.Markdown(show_benchmarks()) | |
| # Tab 5: About | |
| with gr.TabItem("βΉοΈ About"): | |
| gr.Markdown(ABOUT_MD) | |
| gr.Markdown("---") | |
| gr.Markdown(""" | |
| <center> | |
| **TopoGrammar v2.1.0** | Part of **OmniPrime Enterprise Platform** | |
| [π bioprime.one](https://bioprime.one) | [𧬠GitHub](https://github.com/Saifullah62/OmniPrime_v1.0) | [π§ Contact](mailto:info@bioprime.one) | |
| *This is a demonstration. Clinical use requires the full OmniPrime Enterprise Platform.* | |
| </center> | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |