GotThatData commited on
Commit
e1253eb
Β·
verified Β·
1 Parent(s): 78d8c53

Fix: Remove unused plotly/numpy imports

Browse files
Files changed (1) hide show
  1. app.py +1036 -1038
app.py CHANGED
@@ -1,1038 +1,1036 @@
1
- """
2
- TopoGrammar - Grammar-Aware 3D Genome Engine Demo
3
- ==================================================
4
-
5
- Hugging Face Spaces Demo for TopoGrammar v2.1.0
6
- The Industry's First Grammar-Aware Engine for Balanced Structural Variants
7
-
8
- Copyright (c) 2026 Bryan Daugherty, Gregory Ward & Shawn Ryan. All Rights Reserved.
9
-
10
- This demo showcases TopoGrammar's capabilities without exposing proprietary algorithms.
11
- All results are pre-computed demonstrations for educational purposes.
12
- """
13
-
14
- import gradio as gr
15
- import hashlib
16
- import random
17
- import time
18
- import httpx
19
- import json
20
- import os
21
- from datetime import datetime
22
- from typing import Optional, Dict, List, Tuple
23
- import plotly.graph_objects as go
24
- import numpy as np
25
-
26
- # =============================================================================
27
- # BSV Blockchain Configuration
28
- # =============================================================================
29
-
30
- BSV_API_URL = "https://simplebsv.codenlighten.org"
31
- BSV_API_KEY = os.getenv("BSV_API_KEY") # Set via HF Secrets
32
-
33
-
34
- def publish_to_bsv(data: Dict) -> Optional[str]:
35
- """
36
- Publish attestation data to BSV blockchain via SimpleBSV API.
37
-
38
- Args:
39
- data: Dictionary containing attestation data (no trade secrets)
40
-
41
- Returns:
42
- Transaction ID if successful, None otherwise
43
- """
44
- # Guard clause: fail gracefully if no key configured
45
- if not BSV_API_KEY:
46
- print("BSV_API_KEY not configured - running in offline mode")
47
- return None
48
-
49
- try:
50
- headers = {
51
- "Content-Type": "application/json",
52
- "x-api-key": BSV_API_KEY
53
- }
54
-
55
- # Use ?wait=true for synchronous response with txid
56
- response = httpx.post(
57
- f"{BSV_API_URL}/publish/json?wait=true",
58
- headers=headers,
59
- json={"json": data},
60
- timeout=30.0
61
- )
62
-
63
- if response.status_code == 200:
64
- result = response.json()
65
- return result.get("txid")
66
- else:
67
- print(f"BSV API error: {response.status_code} - {response.text}")
68
- return None
69
-
70
- except Exception as e:
71
- print(f"BSV publish error: {e}")
72
- return None
73
-
74
- # =============================================================================
75
- # Demo Configuration - Pre-computed results (no trade secrets exposed)
76
- # =============================================================================
77
-
78
- # Demo VUS Examples with pre-computed classifications
79
- VUS_EXAMPLES = {
80
- "brca1_boundary": {
81
- "name": "BRCA1 TAD Boundary Disruption",
82
- "variant_id": "chr17:43,044,295 G>A",
83
- "gene": "BRCA1",
84
- "initial_class": "VUS",
85
- "final_class": "Pathogenic",
86
- "confidence": 0.93,
87
- "mechanism": "TAD Boundary Disruption",
88
- "evidence_codes": ["PS3_insulation", "PM1_boundary", "PS3_ctcf"],
89
- "insulation_change": -0.52,
90
- "description": "This intronic variant disrupts a CTCF binding site at a critical TAD boundary, causing enhancer-promoter miscommunication affecting BRCA1 expression.",
91
- },
92
- "myc_enhancer_hijack": {
93
- "name": "MYC Enhancer Hijacking",
94
- "variant_id": "chr8:128,750,000 inv(500kb)",
95
- "gene": "MYC",
96
- "initial_class": "VUS",
97
- "final_class": "Pathogenic",
98
- "confidence": 0.89,
99
- "mechanism": "Enhancer Hijacking",
100
- "evidence_codes": ["PS3_neoloop", "PS2_oncogene", "PM1_boundary"],
101
- "insulation_change": -0.68,
102
- "description": "Balanced inversion creates a neo-loop connecting MYC to a hijacked super-enhancer, causing oncogene activation without copy number change.",
103
- },
104
- "sonic_hedgehog": {
105
- "name": "SHH Limb Enhancer",
106
- "variant_id": "chr7:156,584,000 del(50kb)",
107
- "gene": "SHH",
108
- "initial_class": "VUS",
109
- "final_class": "Likely Pathogenic",
110
- "confidence": 0.85,
111
- "mechanism": "Enhancer Deletion",
112
- "evidence_codes": ["PM1_regulatory", "PP3_conservation", "PS3_insulation"],
113
- "insulation_change": -0.41,
114
- "description": "Deletion removes the ZRS limb enhancer from SHH regulatory domain, disrupting developmental gene expression.",
115
- },
116
- "benign_intronic": {
117
- "name": "Benign Intronic SNP",
118
- "variant_id": "chr12:25,398,284 C>T",
119
- "gene": "KRAS",
120
- "initial_class": "VUS",
121
- "final_class": "Benign",
122
- "confidence": 0.91,
123
- "mechanism": "No 3D Impact",
124
- "evidence_codes": ["BS1_frequency", "BP4_no_disruption"],
125
- "insulation_change": 0.02,
126
- "description": "Deep intronic variant with no effect on TAD structure, CTCF binding, or regulatory grammar. Common in population databases.",
127
- },
128
- "tp53_scramble": {
129
- "name": "TP53 Grammar Scrambling",
130
- "variant_id": "chr17:7,670,000 inv(120kb)",
131
- "gene": "TP53",
132
- "initial_class": "VUS",
133
- "final_class": "Pathogenic",
134
- "confidence": 0.96,
135
- "mechanism": "Regulatory Grammar Scrambling",
136
- "evidence_codes": ["PS3_semantic", "PS3_insulation", "PM1_boundary", "PP3_conservation"],
137
- "insulation_change": -0.71,
138
- "description": "Balanced inversion scrambles the regulatory grammar of TP53, inverting enhancer-promoter orientation and disrupting tumor suppressor expression.",
139
- },
140
- }
141
-
142
- # Demo TAD examples
143
- TAD_EXAMPLES = {
144
- "chr21_dscr": {
145
- "name": "Chromosome 21 - Down Syndrome Critical Region",
146
- "region": "chr21:35,000,000-40,000,000",
147
- "n_tads": 4,
148
- "n_boundaries": 5,
149
- "ctcf_sites": 12,
150
- "genes": ["DSCR1", "DSCR3", "DSCR4", "RUNX1"],
151
- "boundary_strength": [0.85, 0.92, 0.78, 0.88, 0.81],
152
- },
153
- "chr7_egfr": {
154
- "name": "Chromosome 7 - EGFR Locus",
155
- "region": "chr7:55,000,000-56,500,000",
156
- "n_tads": 3,
157
- "n_boundaries": 4,
158
- "ctcf_sites": 8,
159
- "genes": ["EGFR", "LANCL2", "VOPP1"],
160
- "boundary_strength": [0.91, 0.87, 0.94, 0.82],
161
- },
162
- "chr8_myc": {
163
- "name": "Chromosome 8 - MYC Oncogene",
164
- "region": "chr8:127,500,000-129,500,000",
165
- "n_tads": 2,
166
- "n_boundaries": 3,
167
- "ctcf_sites": 6,
168
- "genes": ["MYC", "PVT1"],
169
- "boundary_strength": [0.96, 0.89, 0.93],
170
- },
171
- }
172
-
173
- # Demo Neo-Loop examples
174
- NEOLOOP_EXAMPLES = {
175
- "burkitt_myc": {
176
- "name": "Burkitt Lymphoma - MYC Translocation",
177
- "sv_type": "Translocation t(8;14)",
178
- "oncogene": "MYC",
179
- "hijacked_enhancer": "IGH Super-Enhancer",
180
- "loop_strength": 0.92,
181
- "clinical_priority": "Critical",
182
- "cancer_type": "Burkitt Lymphoma",
183
- "description": "Classic t(8;14) translocation juxtaposes MYC with immunoglobulin heavy chain enhancers, creating pathogenic neo-loop.",
184
- },
185
- "ewing_ewsr1": {
186
- "name": "Ewing Sarcoma - EWSR1-FLI1",
187
- "sv_type": "Translocation t(11;22)",
188
- "oncogene": "EWSR1-FLI1 fusion",
189
- "hijacked_enhancer": "GGAA microsatellite enhancers",
190
- "loop_strength": 0.88,
191
- "clinical_priority": "Critical",
192
- "cancer_type": "Ewing Sarcoma",
193
- "description": "Fusion protein creates neo-loops at GGAA microsatellites, aberrantly activating developmental genes.",
194
- },
195
- "aml_runx1": {
196
- "name": "AML - RUNX1 Disruption",
197
- "sv_type": "Inversion inv(16)",
198
- "oncogene": "CBFB-MYH11 fusion",
199
- "hijacked_enhancer": "Myeloid enhancer cluster",
200
- "loop_strength": 0.85,
201
- "clinical_priority": "High",
202
- "cancer_type": "Acute Myeloid Leukemia",
203
- "description": "Pericentric inversion disrupts normal RUNX1 regulation, creating aberrant chromatin loops.",
204
- },
205
- }
206
-
207
- # Benchmark comparisons
208
- BENCHMARK_DATA = {
209
- "tad_detection": {
210
- "TopoGrammar (CTCF-gated)": {"accuracy": 91, "f1": 0.91, "subtad": True},
211
- "HiCCUPS": {"accuracy": 80, "f1": 0.76, "subtad": False},
212
- "Arrowhead": {"accuracy": 78, "f1": 0.74, "subtad": False},
213
- "TopDom": {"accuracy": 75, "f1": 0.71, "subtad": False},
214
- },
215
- "vus_resolution": {
216
- "reclassification_rate": 68,
217
- "pathogenic_accuracy": 93,
218
- "mean_confidence": 87,
219
- },
220
- "insulation_density": {
221
- "major_tads": 9.11,
222
- "all_boundaries": 15.57,
223
- "improvement": 70.9,
224
- },
225
- }
226
-
227
-
228
- # =============================================================================
229
- # Visualization Functions
230
- # =============================================================================
231
-
232
- def create_contact_map_ascii(region: str, n_tads: int) -> str:
233
- """Create ASCII representation of a Hi-C contact map with TADs."""
234
- size = 40
235
- map_lines = []
236
-
237
- # Create diagonal pattern with TAD blocks
238
- tad_size = size // n_tads
239
-
240
- for i in range(size):
241
- row = ""
242
- for j in range(size):
243
- if abs(i - j) <= 2:
244
- row += "β–ˆβ–ˆ" # Diagonal
245
- elif (i // tad_size) == (j // tad_size):
246
- # Within same TAD
247
- distance = abs(i - j)
248
- if distance < tad_size // 2:
249
- row += "β–“β–“"
250
- elif distance < tad_size:
251
- row += "β–‘β–‘"
252
- else:
253
- row += " "
254
- else:
255
- row += " "
256
- map_lines.append(row)
257
-
258
- return "\n".join(map_lines)
259
-
260
-
261
- def create_insulation_profile(boundary_strengths: List[float]) -> str:
262
- """Create ASCII insulation score profile."""
263
- lines = []
264
- lines.append("Insulation Score Profile:")
265
- lines.append("=" * 60)
266
-
267
- for i, strength in enumerate(boundary_strengths):
268
- bar_len = int(strength * 40)
269
- bar = "β–ˆ" * bar_len + "β–‘" * (40 - bar_len)
270
- lines.append(f"Boundary {i+1}: [{bar}] {strength:.2f}")
271
-
272
- lines.append("=" * 60)
273
- return "\n".join(lines)
274
-
275
-
276
- def create_grammar_diagram(mechanism: str) -> str:
277
- """Create regulatory grammar visualization."""
278
- if mechanism == "Regulatory Grammar Scrambling":
279
- return """
280
- β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
281
- β”‚ REGULATORY GRAMMAR ANALYSIS β”‚
282
- β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
283
- β”‚ β”‚
284
- β”‚ REFERENCE GRAMMAR: β”‚
285
- β”‚ β”Œβ”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β” β”‚
286
- β”‚ β”‚CTCF+ β”‚ β†’ β”‚ ENHANCER β”‚ β†’ β”‚ PROMOTER β”‚ β†’ β”‚ GENE β”‚ β”‚
287
- β”‚ β””β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”˜ β”‚
288
- β”‚ "The enhancer activates the gene" β”‚
289
- β”‚ β”‚
290
- β”‚ VARIANT GRAMMAR (SCRAMBLED): β”‚
291
- β”‚ β”Œβ”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β” β”‚
292
- β”‚ β”‚ GENE β”‚ ← β”‚ PROMOTER β”‚ ← β”‚ ENHANCER β”‚ ← β”‚CTCF- β”‚ β”‚
293
- β”‚ β””β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”˜ β”‚
294
- β”‚ "Gene the activates enhancer the" ⚠ SCRAMBLED β”‚
295
- β”‚ β”‚
296
- β”‚ SEMANTIC BREAK SCORE: 0.85 (CRITICAL) β”‚
297
- β”‚ β€’ Token Disruption: 30% weight β”‚
298
- β”‚ β€’ Order Inversion: 40% weight β”‚
299
- β”‚ β€’ Orientation Flip: 30% weight β”‚
300
- β”‚ β”‚
301
- β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
302
- """
303
- elif mechanism == "Enhancer Hijacking":
304
- return """
305
- β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
306
- β”‚ ENHANCER HIJACKING ANALYSIS β”‚
307
- β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
308
- β”‚ β”‚
309
- β”‚ NORMAL TOPOLOGY: β”‚
310
- β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚
311
- β”‚ β”‚ TAD A β”‚ β”‚ TAD B β”‚ β”‚
312
- β”‚ β”‚ [Enhancer]──────│─X───│──────[Oncogene] β”‚ β”‚
313
- β”‚ β”‚ ↓ β”‚ β”‚ β”‚ β”‚
314
- β”‚ β”‚ [Target Gene] β”‚ β”‚ β”‚ β”‚
315
- β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚
316
- β”‚ BOUNDARY BLOCKS CONTACT β”‚
317
- β”‚ β”‚
318
- β”‚ AFTER STRUCTURAL VARIANT: β”‚
319
- β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚
320
- β”‚ β”‚ FUSED TAD β”‚ β”‚
321
- β”‚ β”‚ [Enhancer]═══════════════════[Oncogene] β”‚ β”‚
322
- β”‚ β”‚ ↓ NEO-LOOP FORMED ↓ β”‚ β”‚
323
- β”‚ β”‚ [Target Gene] ⚠ ONCOGENE ACTIVATED ⚠ β”‚ β”‚
324
- β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚
325
- β”‚ β”‚
326
- β”‚ LOOP STRENGTH: 0.89 | PRIORITY: CRITICAL β”‚
327
- β”‚ β”‚
328
- β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
329
- """
330
- elif mechanism == "TAD Boundary Disruption":
331
- return """
332
- β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
333
- β”‚ TAD BOUNDARY DISRUPTION ANALYSIS β”‚
334
- β”œβ”€οΏ½οΏ½οΏ½β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
335
- β”‚ β”‚
336
- β”‚ NORMAL INSULATION: β”‚
337
- β”‚ β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ β”‚ β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ β”‚
338
- β”‚ TAD A β”‚ TAD B β”‚
339
- β”‚ CTCFβ”‚CTCF β”‚
340
- β”‚ β–²β–²β–²β–²β–²β”‚β–²β–²β–²β–²β–² β”‚
341
- β”‚ Strong Boundary β”‚
342
- β”‚ β”‚
343
- β”‚ AFTER VARIANT (CTCF SITE DISRUPTED): β”‚
344
- β”‚ β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ β”‚
345
- β”‚ TAD A β–‘β–‘β–‘β–‘β–‘β–‘β–‘ TAD B β”‚
346
- β”‚ ↓↓↓↓↓ β”‚
347
- β”‚ Insulation Leak β”‚
348
- β”‚ β”‚
349
- β”‚ INSULATION CHANGE: -52% β”‚
350
- β”‚ BOUNDARY LOSS: MAJOR β”‚
351
- β”‚ β”‚
352
- β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
353
- """
354
- else:
355
- return """
356
- β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
357
- β”‚ 3D GENOME ANALYSIS β”‚
358
- β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
359
- β”‚ β”‚
360
- β”‚ No significant 3D structural impact detected. β”‚
361
- β”‚ β”‚
362
- β”‚ β€’ TAD boundaries: INTACT β”‚
363
- β”‚ β€’ Insulation scores: NORMAL β”‚
364
- β”‚ β€’ Regulatory grammar: PRESERVED β”‚
365
- β”‚ β€’ CTCF binding: UNAFFECTED β”‚
366
- β”‚ β”‚
367
- β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
368
- """
369
-
370
-
371
- def create_confidence_bars(evidence_codes: List[str], confidence: float) -> str:
372
- """Create evidence code visualization."""
373
- lines = []
374
- lines.append("\n### Evidence Summary\n")
375
-
376
- for code in evidence_codes:
377
- # Determine strength from code prefix
378
- if code.startswith("PS"):
379
- strength = "Strong"
380
- bar = "β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ"
381
- color = "🟒"
382
- elif code.startswith("PM"):
383
- strength = "Moderate"
384
- bar = "β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘"
385
- color = "🟑"
386
- elif code.startswith("PP"):
387
- strength = "Supporting"
388
- bar = "β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘"
389
- color = "🟠"
390
- elif code.startswith("BS") or code.startswith("BP"):
391
- strength = "Benign"
392
- bar = "β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ"
393
- color = "πŸ”΅"
394
- else:
395
- strength = "Unknown"
396
- bar = "β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘"
397
- color = "βšͺ"
398
-
399
- lines.append(f"{color} **{code}** ({strength}): `{bar}`")
400
-
401
- lines.append(f"\n**Overall Confidence**: {confidence:.0%}")
402
-
403
- return "\n".join(lines)
404
-
405
-
406
- def generate_bsv_attestation(variant_id: str, classification: str, confidence: float) -> str:
407
- """Generate and publish BSV blockchain attestation."""
408
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC")
409
-
410
- # Create attestation data (no trade secrets - only results summary)
411
- attestation_data = {
412
- "platform": "TopoGrammar",
413
- "version": "v2.1.0",
414
- "timestamp": timestamp,
415
- "analysis_type": "VUS_Resolution",
416
- "variant_id": variant_id,
417
- "classification": classification,
418
- "confidence": round(confidence, 2),
419
- "attestation_type": "demo"
420
- }
421
-
422
- # Create hash of the attestation data
423
- data_str = json.dumps(attestation_data, sort_keys=True)
424
- query_hash = hashlib.sha256(data_str.encode()).hexdigest()[:16]
425
- full_hash = hashlib.sha256(data_str.encode()).hexdigest()
426
-
427
- # Publish to BSV blockchain
428
- txid = publish_to_bsv(attestation_data)
429
-
430
- if txid:
431
- # Real blockchain attestation
432
- whatsonchain_url = f"https://whatsonchain.com/tx/{txid}"
433
- status_line = f"Status: βœ“ RECORDED ON BSV MAINNET"
434
- txid_display = txid[:20] + "..." if len(txid) > 20 else txid
435
- verify_section = f"""β•‘ Transaction ID: {txid_display}
436
- β•‘ β•‘
437
- β•‘ πŸ”— Verify on WhatsOnChain: β•‘
438
- β•‘ {whatsonchain_url[:54]}"""
439
- else:
440
- # Fallback if API fails
441
- status_line = "Status: ⚠ OFFLINE MODE (BSV API unavailable)"
442
- verify_section = f"""β•‘ Data Hash: {full_hash[:32]}...
443
- β•‘ β•‘
444
- β•‘ β„Ή Blockchain recording temporarily unavailable. β•‘
445
- β•‘ Result hash preserved for later attestation."""
446
-
447
- return f"""
448
- ```
449
- ╔══════════════════════════════════════════════════════════════╗
450
- β•‘ TOPOGRAMMAR BSV ATTESTATION CERTIFICATE β•‘
451
- ╠══════════════════════════════════════════════════════════════╣
452
- β•‘ β•‘
453
- β•‘ Query Hash: {query_hash} β•‘
454
- β•‘ Timestamp: {timestamp} β•‘
455
- β•‘ Model Version: TopoGrammar v2.1.0 β•‘
456
- β•‘ β•‘
457
- β•‘ ───────────────────────────────────────────────────────── β•‘
458
- β•‘ β•‘
459
- β•‘ Variant: {variant_id}
460
- β•‘ Classification: {classification}
461
- β•‘ Confidence: {confidence:.1%}
462
- β•‘ β•‘
463
- β•‘ ───────────────────────────────────────────────────────── β•‘
464
- β•‘ β•‘
465
- β•‘ {status_line}
466
- β•‘ Network: BSV Mainnet β•‘
467
- β•‘ β•‘
468
- {verify_section}
469
- β•‘ β•‘
470
- β•‘ This attestation is immutably recorded on BSV blockchain. β•‘
471
- β•‘ No proprietary algorithms or trade secrets are published. β•‘
472
- β•‘ β•‘
473
- β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
474
- ```
475
- """
476
-
477
-
478
- def create_share_links(variant_id: str, classification: str) -> str:
479
- """Create social sharing buttons."""
480
- import urllib.parse
481
-
482
- text = f"🧬 TopoGrammar reclassified {variant_id} as {classification}! Grammar-aware 3D genome analysis for precision medicine. #Genomics #AI #PrecisionMedicine"
483
-
484
- twitter_url = f"https://twitter.com/intent/tweet?text={urllib.parse.quote(text)}&url=https://huggingface.co/spaces/GotThatData/TopoGrammar"
485
- linkedin_url = f"https://www.linkedin.com/sharing/share-offsite/?url=https://huggingface.co/spaces/GotThatData/TopoGrammar"
486
-
487
- return f"""
488
- <div style="display: flex; gap: 10px; margin-top: 20px;">
489
- <a href="{twitter_url}" target="_blank" style="background: #1DA1F2; color: white; padding: 10px 20px; border-radius: 5px; text-decoration: none;">
490
- 🐦 Share on X
491
- </a>
492
- <a href="{linkedin_url}" target="_blank" style="background: #0077B5; color: white; padding: 10px 20px; border-radius: 5px; text-decoration: none;">
493
- πŸ’Ό Share on LinkedIn
494
- </a>
495
- </div>
496
- """
497
-
498
-
499
- # =============================================================================
500
- # Main Demo Functions
501
- # =============================================================================
502
-
503
- def run_vus_analysis(example_key: str) -> str:
504
- """Run VUS resolution demo."""
505
- if example_key not in VUS_EXAMPLES:
506
- return "❌ Example not found. Please select a valid example."
507
-
508
- # Simulate processing delay
509
- time.sleep(1.5)
510
-
511
- example = VUS_EXAMPLES[example_key]
512
-
513
- # Build result markdown
514
- result = f"""
515
- # 🧬 VUS Resolution Analysis
516
-
517
- ## Variant Information
518
- | Field | Value |
519
- |-------|-------|
520
- | **Variant ID** | `{example['variant_id']}` |
521
- | **Gene** | {example['gene']} |
522
- | **Initial Classification** | {example['initial_class']} |
523
-
524
- ---
525
-
526
- ## TopoGrammar Analysis Result
527
-
528
- ### Classification Update
529
- | Before | β†’ | After |
530
- |--------|---|-------|
531
- | **{example['initial_class']}** | πŸ”„ | **{example['final_class']}** |
532
-
533
- ### Confidence Score
534
- {"β–ˆ" * int(example['confidence'] * 20)}{"β–‘" * (20 - int(example['confidence'] * 20))} **{example['confidence']:.0%}**
535
-
536
- ### Primary Mechanism
537
- **{example['mechanism']}**
538
-
539
- ### 3D Structural Impact
540
- - **Insulation Change**: {example['insulation_change']:+.0%}
541
-
542
- ---
543
-
544
- ## Mechanism Visualization
545
-
546
- {create_grammar_diagram(example['mechanism'])}
547
-
548
- ---
549
-
550
- {create_confidence_bars(example['evidence_codes'], example['confidence'])}
551
-
552
- ---
553
-
554
- ## Clinical Interpretation
555
-
556
- {example['description']}
557
-
558
- ---
559
-
560
- ## BSV Verification
561
-
562
- {generate_bsv_attestation(example['variant_id'], example['final_class'], example['confidence'])}
563
-
564
- ---
565
-
566
- {create_share_links(example['variant_id'], example['final_class'])}
567
- """
568
-
569
- return result
570
-
571
-
572
- def run_tad_analysis(example_key: str) -> str:
573
- """Run TAD detection demo."""
574
- if example_key not in TAD_EXAMPLES:
575
- return "❌ Example not found. Please select a valid example."
576
-
577
- time.sleep(1.0)
578
-
579
- example = TAD_EXAMPLES[example_key]
580
-
581
- result = f"""
582
- # πŸ”¬ TAD Detection Analysis
583
-
584
- ## Region Information
585
- | Field | Value |
586
- |-------|-------|
587
- | **Region** | `{example['region']}` |
588
- | **TADs Detected** | {example['n_tads']} |
589
- | **Boundaries** | {example['n_boundaries']} |
590
- | **CTCF Sites** | {example['ctcf_sites']} |
591
-
592
- ---
593
-
594
- ## Genes in Region
595
- {', '.join([f"**{g}**" for g in example['genes']])}
596
-
597
- ---
598
-
599
- ## Contact Map Visualization
600
-
601
- ```
602
- {create_contact_map_ascii(example['region'], example['n_tads'])}
603
- ```
604
-
605
- ---
606
-
607
- ## Boundary Strength Profile
608
-
609
- ```
610
- {create_insulation_profile(example['boundary_strength'])}
611
- ```
612
-
613
- ---
614
-
615
- ## Detection Method
616
-
617
- TopoGrammar uses **CTCF-gated boundary detection** which achieves:
618
- - **91% TAD accuracy** (vs 80% for HiCCUPS)
619
- - **0.91 F1 score** for boundary detection
620
- - **Sub-TAD detection** capability
621
-
622
- The CTCF-gating mechanism ensures boundaries are only called where:
623
- 1. Insulation score shows local minimum
624
- 2. CTCF binding evidence is present
625
- 3. Gradient analysis confirms boundary
626
-
627
- ---
628
-
629
- ## Benchmark Comparison
630
-
631
- | Method | TAD Accuracy | Boundary F1 | Sub-TAD |
632
- |--------|-------------|-------------|---------|
633
- | **TopoGrammar** | **91%** | **0.91** | βœ“ |
634
- | HiCCUPS | 80% | 0.76 | βœ— |
635
- | Arrowhead | 78% | 0.74 | βœ— |
636
- | TopDom | 75% | 0.71 | βœ— |
637
- """
638
-
639
- return result
640
-
641
-
642
- def run_neoloop_analysis(example_key: str) -> str:
643
- """Run neo-loop detection demo."""
644
- if example_key not in NEOLOOP_EXAMPLES:
645
- return "❌ Example not found. Please select a valid example."
646
-
647
- time.sleep(1.2)
648
-
649
- example = NEOLOOP_EXAMPLES[example_key]
650
-
651
- # Priority styling
652
- if example['clinical_priority'] == "Critical":
653
- priority_emoji = "πŸ”΄"
654
- priority_style = "color: red; font-weight: bold;"
655
- else:
656
- priority_emoji = "🟑"
657
- priority_style = "color: orange; font-weight: bold;"
658
-
659
- result = f"""
660
- # πŸ§ͺ Neo-Loop Detection Analysis
661
-
662
- ## Structural Variant
663
- | Field | Value |
664
- |-------|-------|
665
- | **SV Type** | `{example['sv_type']}` |
666
- | **Cancer Type** | {example['cancer_type']} |
667
- | **Clinical Priority** | {priority_emoji} **{example['clinical_priority']}** |
668
-
669
- ---
670
-
671
- ## Oncogene Activation
672
-
673
- ### Activated Oncogene
674
- **{example['oncogene']}**
675
-
676
- ### Hijacked Enhancer
677
- **{example['hijacked_enhancer']}**
678
-
679
- ### Neo-Loop Strength
680
- {"β–ˆ" * int(example['loop_strength'] * 20)}{"β–‘" * (20 - int(example['loop_strength'] * 20))} **{example['loop_strength']:.0%}**
681
-
682
- ---
683
-
684
- ## Mechanism Visualization
685
-
686
- ```
687
- β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
688
- β”‚ NEO-LOOP FORMATION β”‚
689
- β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
690
- β”‚ β”‚
691
- β”‚ BEFORE: {example['sv_type']}
692
- β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚
693
- β”‚ β”‚ Enhancer │─ ─ X ─ ─│ Oncogene β”‚ β”‚
694
- β”‚ β”‚ Domain A β”‚ β”‚ Domain B β”‚ β”‚
695
- β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚
696
- β”‚ ↓ β”‚
697
- β”‚ Normal Target β”‚
698
- β”‚ β”‚
699
- β”‚ AFTER: {example['sv_type']}
700
- β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚
701
- β”‚ β”‚ Enhancer ══════════ Oncogene β”‚ β”‚
702
- β”‚ β”‚ ↓ NEO-LOOP ↓ β”‚ β”‚
703
- β”‚ β”‚ ABERRANT ACTIVATION β”‚ β”‚
704
- β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚
705
- β”‚ β”‚
706
- β”‚ Loop Strength: {example['loop_strength']:.2f}
707
- β”‚ Priority: {example['clinical_priority']}
708
- β”‚ β”‚
709
- β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
710
- ```
711
-
712
- ---
713
-
714
- ## Clinical Interpretation
715
-
716
- {example['description']}
717
-
718
- ---
719
-
720
- ## Actionable Insights
721
-
722
- Based on this neo-loop detection:
723
-
724
- 1. **Molecular Testing**: Confirm {example['sv_type']} by FISH or karyotyping
725
- 2. **Targeted Therapy**: Consider therapies targeting {example['oncogene']} pathway
726
- 3. **Clinical Trial**: Patient may be eligible for trials targeting this mechanism
727
- 4. **Monitoring**: Track {example['oncogene']} expression as biomarker
728
-
729
- ---
730
-
731
- ## BSV Verification
732
-
733
- {generate_bsv_attestation(example['sv_type'], f"Neo-loop: {example['oncogene']}", example['loop_strength'])}
734
- """
735
-
736
- return result
737
-
738
-
739
- def show_benchmarks() -> str:
740
- """Show benchmark results."""
741
- return f"""
742
- # πŸ“Š TopoGrammar Benchmarks
743
-
744
- ## TAD Detection Performance
745
-
746
- | Method | TAD Accuracy | Boundary F1 | Sub-TAD Detection |
747
- |--------|-------------|-------------|-------------------|
748
- | **TopoGrammar (CTCF-gated)** | **91%** | **0.91** | βœ“ Yes |
749
- | HiCCUPS | 80% | 0.76 | βœ— No |
750
- | Arrowhead | 78% | 0.74 | βœ— No |
751
- | TopDom | 75% | 0.71 | βœ— No |
752
-
753
- ---
754
-
755
- ## VUS Resolution Performance
756
-
757
- | Metric | Value |
758
- |--------|-------|
759
- | **Reclassification Rate** | 68% of VUS variants |
760
- | **Pathogenic Accuracy** | 93% |
761
- | **Mean Confidence Score** | 87% |
762
-
763
- ---
764
-
765
- ## Insulation Density Improvement
766
-
767
- | Boundary Set | Insulation Ratio | P-value |
768
- |--------------|------------------|---------|
769
- | Major TADs only | 9.11x | <0.0001 |
770
- | **All Boundaries** | **15.57x** | **<0.0001** |
771
- | **Improvement** | **+70.9%** | β€” |
772
-
773
- ---
774
-
775
- ## What Makes TopoGrammar Different
776
-
777
- ### 1. Grammar-Aware Architecture
778
-
779
- Traditional tools see DNA as a string of letters. TopoGrammar sees it as **sentences with grammar**:
780
-
781
- ```
782
- Reference: [CTCF+] [Enhancer] [Promoter] [Gene]
783
- "The enhancer activates the gene"
784
-
785
- Inversion: [Gene] [Promoter] [Enhancer] [CTCF-]
786
- "Gene the activates enhancer the" ← SCRAMBLED
787
- ```
788
-
789
- ### 2. Physics + Semantics Concordance
790
-
791
- When both physics (insulation collapse) AND semantics (grammar scramble) agree:
792
-
793
- | Evidence Type | Alone | Concordant |
794
- |--------------|-------|------------|
795
- | Physics | PM1 (Moderate) | β€” |
796
- | Semantics | PM1 (Moderate) | β€” |
797
- | **Both** | β€” | **PS3 (Strong)** ← UPGRADE |
798
-
799
- ### 3. CTCF-Gated Detection
800
-
801
- Unlike other tools, TopoGrammar only calls boundaries where:
802
- - βœ“ Insulation score shows local minimum
803
- - βœ“ CTCF binding evidence is present
804
- - βœ“ Gradient analysis confirms boundary
805
-
806
- This reduces false positives by **40%** compared to insulation-only methods.
807
-
808
- ---
809
-
810
- ## Clinical Value Pillars
811
-
812
- | Pillar | Clinical Value | Technical Foundation |
813
- |--------|---------------|---------------------|
814
- | **Architectural Fidelity** | Eliminates VUS by proving physical boundary collapse | PINN Physics (15.57x insulation) |
815
- | **Semantic Intelligence** | Detects "scrambled" instructions in balanced inversions | Regulatory Grammar Encoder |
816
- | **Privacy-First Growth** | Global model evolution without data leakage | Async Federated Learning |
817
- | **Clinician Clarity** | High-level medical prose instead of raw math | LLM Interpretation Layer |
818
- """
819
-
820
-
821
- # =============================================================================
822
- # Gradio Interface
823
- # =============================================================================
824
-
825
- HEADER_MD = """
826
- # 🧬 TopoGrammar
827
-
828
- ## The Industry's First Grammar-Aware 3D Genome Engine
829
-
830
- **Balanced Structural Variants (BSVs)** - inversions, translocations, complex rearrangements - appear "silent" to standard sequencers because they don't change gene dosage. But they **scramble the regulatory grammar** that controls gene expression.
831
-
832
- **TopoGrammar solves this.** It's the first engine that understands chromatin as a *language* with grammar rules that can be broken.
833
-
834
- ---
835
-
836
- | Capability | Performance |
837
- |------------|-------------|
838
- | 🎯 VUS Reclassification | 68% of variants |
839
- | πŸ“Š Pathogenic Accuracy | 93% confidence |
840
- | πŸ”¬ TAD Detection F1 | 0.91 (vs 0.76 HiCCUPS) |
841
- | ⚑ Sub-TAD Detection | Yes (unique capability) |
842
-
843
- ---
844
- """
845
-
846
- ABOUT_MD = """
847
- # ℹ️ About TopoGrammar
848
-
849
- ## Overview
850
-
851
- TopoGrammar is part of the **OmniPrime Enterprise Platform**, integrating:
852
- - **TopoGrammar v2.1.0** - Grammar-Aware 3D Genome Engine
853
- - **BioPrime v4.0 "Golden"** - Physics-First Molecular Docking
854
-
855
- Together, they enable a seamless **Patient Genome β†’ Drug Candidate** workflow.
856
-
857
- ---
858
-
859
- ## Core Innovation
860
-
861
- ### Regulatory Grammar Analysis
862
-
863
- TopoGrammar treats regulatory elements as a **language**:
864
-
865
- ```
866
- CTCF β†’ Enhancer β†’ Promoter β†’ Gene
867
- "The enhancer activates the gene"
868
- ```
869
-
870
- When structural variants **scramble** this grammar, TopoGrammar detects it:
871
-
872
- ```
873
- Gene ← Promoter ← Enhancer ← CTCF
874
- "Gene the activates enhancer the" ⚠ PATHOGENIC
875
- ```
876
-
877
- ### Semantic Break Score
878
-
879
- Quantifies regulatory disruption:
880
- - **Token Disruption (30%)**: Elements removed or duplicated
881
- - **Order Inversion (40%)**: Sequence rearranged
882
- - **Orientation Flip (30%)**: Strand direction reversed
883
-
884
- ---
885
-
886
- ## Technology Stack
887
-
888
- - **Physics-Informed Neural Networks (PINNs)** for 3D reconstruction
889
- - **CTCF-Gated Boundary Detection** for precise TAD calling
890
- - **Federated Learning** for privacy-preserving multi-site training
891
- - **LLM Interpretation** for clinical reporting
892
- - **BSV Blockchain** for result attestation
893
-
894
- ---
895
-
896
- ## Creators
897
-
898
- - **Bryan Daugherty**
899
- - **Gregory Ward**
900
- - **Shawn Ryan**
901
-
902
- ---
903
-
904
- ## Learn More
905
-
906
- 🌐 [bioprime.one](https://bioprime.one) | 🧬 [OmniPrime Platform](https://github.com/Saifullah62/OmniPrime_v1.0)
907
-
908
- ---
909
-
910
- **Copyright (c) 2026 Bryan Daugherty, Gregory Ward & Shawn Ryan. All Rights Reserved.**
911
-
912
- *This demo showcases TopoGrammar capabilities. Actual clinical use requires the full OmniPrime Enterprise Platform.*
913
- """
914
-
915
- # Custom CSS
916
- CUSTOM_CSS = """
917
- .gradio-container {
918
- background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%);
919
- }
920
- .gr-button-primary {
921
- background: linear-gradient(90deg, #00d4ff, #00ff88) !important;
922
- border: none !important;
923
- }
924
- .gr-button-secondary {
925
- background: linear-gradient(90deg, #667eea, #764ba2) !important;
926
- border: none !important;
927
- color: white !important;
928
- }
929
- """
930
-
931
- # Build the interface
932
- with gr.Blocks(
933
- title="TopoGrammar - Grammar-Aware 3D Genome Engine",
934
- theme=gr.themes.Base(
935
- primary_hue="cyan",
936
- secondary_hue="purple",
937
- neutral_hue="slate",
938
- ),
939
- css=CUSTOM_CSS,
940
- ) as demo:
941
-
942
- gr.Markdown(HEADER_MD)
943
-
944
- with gr.Tabs():
945
- # Tab 1: VUS Resolution
946
- with gr.TabItem("🧬 VUS Resolution"):
947
- gr.Markdown("""
948
- ## Variant of Uncertain Significance β†’ Clinical Classification
949
-
950
- Select a demo variant to see how TopoGrammar reclassifies VUS using 3D genome analysis.
951
- """)
952
-
953
- with gr.Row():
954
- with gr.Column(scale=1):
955
- gr.Markdown("### Select Example")
956
- vus_brca1 = gr.Button("πŸ”΄ BRCA1 Boundary Disruption", variant="secondary")
957
- vus_myc = gr.Button("πŸ”΄ MYC Enhancer Hijacking", variant="secondary")
958
- vus_shh = gr.Button("🟑 SHH Limb Enhancer", variant="secondary")
959
- vus_tp53 = gr.Button("πŸ”΄ TP53 Grammar Scrambling", variant="secondary")
960
- vus_benign = gr.Button("🟒 Benign Intronic SNP", variant="secondary")
961
-
962
- with gr.Column(scale=3):
963
- vus_output = gr.Markdown("*Select an example to run VUS analysis*")
964
-
965
- vus_brca1.click(fn=lambda: run_vus_analysis("brca1_boundary"), outputs=vus_output)
966
- vus_myc.click(fn=lambda: run_vus_analysis("myc_enhancer_hijack"), outputs=vus_output)
967
- vus_shh.click(fn=lambda: run_vus_analysis("sonic_hedgehog"), outputs=vus_output)
968
- vus_tp53.click(fn=lambda: run_vus_analysis("tp53_scramble"), outputs=vus_output)
969
- vus_benign.click(fn=lambda: run_vus_analysis("benign_intronic"), outputs=vus_output)
970
-
971
- # Tab 2: TAD Detection
972
- with gr.TabItem("πŸ”¬ TAD Detection"):
973
- gr.Markdown("""
974
- ## Topologically Associating Domain Detection
975
-
976
- See how TopoGrammar detects TAD boundaries with CTCF-gating for 91% accuracy.
977
- """)
978
-
979
- with gr.Row():
980
- with gr.Column(scale=1):
981
- gr.Markdown("### Select Region")
982
- tad_dscr = gr.Button("Chr21 - Down Syndrome Region", variant="secondary")
983
- tad_egfr = gr.Button("Chr7 - EGFR Locus", variant="secondary")
984
- tad_myc = gr.Button("Chr8 - MYC Oncogene", variant="secondary")
985
-
986
- with gr.Column(scale=3):
987
- tad_output = gr.Markdown("*Select a region to analyze TAD structure*")
988
-
989
- tad_dscr.click(fn=lambda: run_tad_analysis("chr21_dscr"), outputs=tad_output)
990
- tad_egfr.click(fn=lambda: run_tad_analysis("chr7_egfr"), outputs=tad_output)
991
- tad_myc.click(fn=lambda: run_tad_analysis("chr8_myc"), outputs=tad_output)
992
-
993
- # Tab 3: Neo-Loop Detection
994
- with gr.TabItem("πŸ§ͺ Neo-Loop Detection"):
995
- gr.Markdown("""
996
- ## Cancer Neo-Loop & Enhancer Hijacking Detection
997
-
998
- Identify oncogene activation through structural variant-induced neo-loops.
999
- """)
1000
-
1001
- with gr.Row():
1002
- with gr.Column(scale=1):
1003
- gr.Markdown("### Select Cancer Example")
1004
- neo_burkitt = gr.Button("πŸ”΄ Burkitt Lymphoma (MYC)", variant="secondary")
1005
- neo_ewing = gr.Button("πŸ”΄ Ewing Sarcoma (EWSR1)", variant="secondary")
1006
- neo_aml = gr.Button("🟑 AML (RUNX1)", variant="secondary")
1007
-
1008
- with gr.Column(scale=3):
1009
- neo_output = gr.Markdown("*Select a cancer example to detect neo-loops*")
1010
-
1011
- neo_burkitt.click(fn=lambda: run_neoloop_analysis("burkitt_myc"), outputs=neo_output)
1012
- neo_ewing.click(fn=lambda: run_neoloop_analysis("ewing_ewsr1"), outputs=neo_output)
1013
- neo_aml.click(fn=lambda: run_neoloop_analysis("aml_runx1"), outputs=neo_output)
1014
-
1015
- # Tab 4: Benchmarks
1016
- with gr.TabItem("πŸ“Š Benchmarks"):
1017
- gr.Markdown(show_benchmarks())
1018
-
1019
- # Tab 5: About
1020
- with gr.TabItem("ℹ️ About"):
1021
- gr.Markdown(ABOUT_MD)
1022
-
1023
- gr.Markdown("---")
1024
- gr.Markdown("""
1025
- <center>
1026
-
1027
- **TopoGrammar v2.1.0** | Part of **OmniPrime Enterprise Platform**
1028
-
1029
- [🌐 bioprime.one](https://bioprime.one) | [🧬 GitHub](https://github.com/Saifullah62/OmniPrime_v1.0) | [πŸ“§ Contact](mailto:info@bioprime.one)
1030
-
1031
- *This is a demonstration. Clinical use requires the full OmniPrime Enterprise Platform.*
1032
-
1033
- </center>
1034
- """)
1035
-
1036
-
1037
- if __name__ == "__main__":
1038
- demo.launch()
 
1
+ """
2
+ TopoGrammar - Grammar-Aware 3D Genome Engine Demo
3
+ ==================================================
4
+
5
+ Hugging Face Spaces Demo for TopoGrammar v2.1.0
6
+ The Industry's First Grammar-Aware Engine for Balanced Structural Variants
7
+
8
+ Copyright (c) 2026 Bryan Daugherty, Gregory Ward & Shawn Ryan. All Rights Reserved.
9
+
10
+ This demo showcases TopoGrammar's capabilities without exposing proprietary algorithms.
11
+ All results are pre-computed demonstrations for educational purposes.
12
+ """
13
+
14
+ import gradio as gr
15
+ import hashlib
16
+ import random
17
+ import time
18
+ import httpx
19
+ import json
20
+ import os
21
+ from datetime import datetime
22
+ from typing import Optional, Dict, List, Tuple
23
+
24
+ # =============================================================================
25
+ # BSV Blockchain Configuration
26
+ # =============================================================================
27
+
28
+ BSV_API_URL = "https://simplebsv.codenlighten.org"
29
+ BSV_API_KEY = os.getenv("BSV_API_KEY") # Set via HF Secrets
30
+
31
+
32
+ def publish_to_bsv(data: Dict) -> Optional[str]:
33
+ """
34
+ Publish attestation data to BSV blockchain via SimpleBSV API.
35
+
36
+ Args:
37
+ data: Dictionary containing attestation data (no trade secrets)
38
+
39
+ Returns:
40
+ Transaction ID if successful, None otherwise
41
+ """
42
+ # Guard clause: fail gracefully if no key configured
43
+ if not BSV_API_KEY:
44
+ print("BSV_API_KEY not configured - running in offline mode")
45
+ return None
46
+
47
+ try:
48
+ headers = {
49
+ "Content-Type": "application/json",
50
+ "x-api-key": BSV_API_KEY
51
+ }
52
+
53
+ # Use ?wait=true for synchronous response with txid
54
+ response = httpx.post(
55
+ f"{BSV_API_URL}/publish/json?wait=true",
56
+ headers=headers,
57
+ json={"json": data},
58
+ timeout=30.0
59
+ )
60
+
61
+ if response.status_code == 200:
62
+ result = response.json()
63
+ return result.get("txid")
64
+ else:
65
+ print(f"BSV API error: {response.status_code} - {response.text}")
66
+ return None
67
+
68
+ except Exception as e:
69
+ print(f"BSV publish error: {e}")
70
+ return None
71
+
72
+ # =============================================================================
73
+ # Demo Configuration - Pre-computed results (no trade secrets exposed)
74
+ # =============================================================================
75
+
76
+ # Demo VUS Examples with pre-computed classifications
77
+ VUS_EXAMPLES = {
78
+ "brca1_boundary": {
79
+ "name": "BRCA1 TAD Boundary Disruption",
80
+ "variant_id": "chr17:43,044,295 G>A",
81
+ "gene": "BRCA1",
82
+ "initial_class": "VUS",
83
+ "final_class": "Pathogenic",
84
+ "confidence": 0.93,
85
+ "mechanism": "TAD Boundary Disruption",
86
+ "evidence_codes": ["PS3_insulation", "PM1_boundary", "PS3_ctcf"],
87
+ "insulation_change": -0.52,
88
+ "description": "This intronic variant disrupts a CTCF binding site at a critical TAD boundary, causing enhancer-promoter miscommunication affecting BRCA1 expression.",
89
+ },
90
+ "myc_enhancer_hijack": {
91
+ "name": "MYC Enhancer Hijacking",
92
+ "variant_id": "chr8:128,750,000 inv(500kb)",
93
+ "gene": "MYC",
94
+ "initial_class": "VUS",
95
+ "final_class": "Pathogenic",
96
+ "confidence": 0.89,
97
+ "mechanism": "Enhancer Hijacking",
98
+ "evidence_codes": ["PS3_neoloop", "PS2_oncogene", "PM1_boundary"],
99
+ "insulation_change": -0.68,
100
+ "description": "Balanced inversion creates a neo-loop connecting MYC to a hijacked super-enhancer, causing oncogene activation without copy number change.",
101
+ },
102
+ "sonic_hedgehog": {
103
+ "name": "SHH Limb Enhancer",
104
+ "variant_id": "chr7:156,584,000 del(50kb)",
105
+ "gene": "SHH",
106
+ "initial_class": "VUS",
107
+ "final_class": "Likely Pathogenic",
108
+ "confidence": 0.85,
109
+ "mechanism": "Enhancer Deletion",
110
+ "evidence_codes": ["PM1_regulatory", "PP3_conservation", "PS3_insulation"],
111
+ "insulation_change": -0.41,
112
+ "description": "Deletion removes the ZRS limb enhancer from SHH regulatory domain, disrupting developmental gene expression.",
113
+ },
114
+ "benign_intronic": {
115
+ "name": "Benign Intronic SNP",
116
+ "variant_id": "chr12:25,398,284 C>T",
117
+ "gene": "KRAS",
118
+ "initial_class": "VUS",
119
+ "final_class": "Benign",
120
+ "confidence": 0.91,
121
+ "mechanism": "No 3D Impact",
122
+ "evidence_codes": ["BS1_frequency", "BP4_no_disruption"],
123
+ "insulation_change": 0.02,
124
+ "description": "Deep intronic variant with no effect on TAD structure, CTCF binding, or regulatory grammar. Common in population databases.",
125
+ },
126
+ "tp53_scramble": {
127
+ "name": "TP53 Grammar Scrambling",
128
+ "variant_id": "chr17:7,670,000 inv(120kb)",
129
+ "gene": "TP53",
130
+ "initial_class": "VUS",
131
+ "final_class": "Pathogenic",
132
+ "confidence": 0.96,
133
+ "mechanism": "Regulatory Grammar Scrambling",
134
+ "evidence_codes": ["PS3_semantic", "PS3_insulation", "PM1_boundary", "PP3_conservation"],
135
+ "insulation_change": -0.71,
136
+ "description": "Balanced inversion scrambles the regulatory grammar of TP53, inverting enhancer-promoter orientation and disrupting tumor suppressor expression.",
137
+ },
138
+ }
139
+
140
+ # Demo TAD examples
141
+ TAD_EXAMPLES = {
142
+ "chr21_dscr": {
143
+ "name": "Chromosome 21 - Down Syndrome Critical Region",
144
+ "region": "chr21:35,000,000-40,000,000",
145
+ "n_tads": 4,
146
+ "n_boundaries": 5,
147
+ "ctcf_sites": 12,
148
+ "genes": ["DSCR1", "DSCR3", "DSCR4", "RUNX1"],
149
+ "boundary_strength": [0.85, 0.92, 0.78, 0.88, 0.81],
150
+ },
151
+ "chr7_egfr": {
152
+ "name": "Chromosome 7 - EGFR Locus",
153
+ "region": "chr7:55,000,000-56,500,000",
154
+ "n_tads": 3,
155
+ "n_boundaries": 4,
156
+ "ctcf_sites": 8,
157
+ "genes": ["EGFR", "LANCL2", "VOPP1"],
158
+ "boundary_strength": [0.91, 0.87, 0.94, 0.82],
159
+ },
160
+ "chr8_myc": {
161
+ "name": "Chromosome 8 - MYC Oncogene",
162
+ "region": "chr8:127,500,000-129,500,000",
163
+ "n_tads": 2,
164
+ "n_boundaries": 3,
165
+ "ctcf_sites": 6,
166
+ "genes": ["MYC", "PVT1"],
167
+ "boundary_strength": [0.96, 0.89, 0.93],
168
+ },
169
+ }
170
+
171
+ # Demo Neo-Loop examples
172
+ NEOLOOP_EXAMPLES = {
173
+ "burkitt_myc": {
174
+ "name": "Burkitt Lymphoma - MYC Translocation",
175
+ "sv_type": "Translocation t(8;14)",
176
+ "oncogene": "MYC",
177
+ "hijacked_enhancer": "IGH Super-Enhancer",
178
+ "loop_strength": 0.92,
179
+ "clinical_priority": "Critical",
180
+ "cancer_type": "Burkitt Lymphoma",
181
+ "description": "Classic t(8;14) translocation juxtaposes MYC with immunoglobulin heavy chain enhancers, creating pathogenic neo-loop.",
182
+ },
183
+ "ewing_ewsr1": {
184
+ "name": "Ewing Sarcoma - EWSR1-FLI1",
185
+ "sv_type": "Translocation t(11;22)",
186
+ "oncogene": "EWSR1-FLI1 fusion",
187
+ "hijacked_enhancer": "GGAA microsatellite enhancers",
188
+ "loop_strength": 0.88,
189
+ "clinical_priority": "Critical",
190
+ "cancer_type": "Ewing Sarcoma",
191
+ "description": "Fusion protein creates neo-loops at GGAA microsatellites, aberrantly activating developmental genes.",
192
+ },
193
+ "aml_runx1": {
194
+ "name": "AML - RUNX1 Disruption",
195
+ "sv_type": "Inversion inv(16)",
196
+ "oncogene": "CBFB-MYH11 fusion",
197
+ "hijacked_enhancer": "Myeloid enhancer cluster",
198
+ "loop_strength": 0.85,
199
+ "clinical_priority": "High",
200
+ "cancer_type": "Acute Myeloid Leukemia",
201
+ "description": "Pericentric inversion disrupts normal RUNX1 regulation, creating aberrant chromatin loops.",
202
+ },
203
+ }
204
+
205
+ # Benchmark comparisons
206
+ BENCHMARK_DATA = {
207
+ "tad_detection": {
208
+ "TopoGrammar (CTCF-gated)": {"accuracy": 91, "f1": 0.91, "subtad": True},
209
+ "HiCCUPS": {"accuracy": 80, "f1": 0.76, "subtad": False},
210
+ "Arrowhead": {"accuracy": 78, "f1": 0.74, "subtad": False},
211
+ "TopDom": {"accuracy": 75, "f1": 0.71, "subtad": False},
212
+ },
213
+ "vus_resolution": {
214
+ "reclassification_rate": 68,
215
+ "pathogenic_accuracy": 93,
216
+ "mean_confidence": 87,
217
+ },
218
+ "insulation_density": {
219
+ "major_tads": 9.11,
220
+ "all_boundaries": 15.57,
221
+ "improvement": 70.9,
222
+ },
223
+ }
224
+
225
+
226
+ # =============================================================================
227
+ # Visualization Functions
228
+ # =============================================================================
229
+
230
+ def create_contact_map_ascii(region: str, n_tads: int) -> str:
231
+ """Create ASCII representation of a Hi-C contact map with TADs."""
232
+ size = 40
233
+ map_lines = []
234
+
235
+ # Create diagonal pattern with TAD blocks
236
+ tad_size = size // n_tads
237
+
238
+ for i in range(size):
239
+ row = ""
240
+ for j in range(size):
241
+ if abs(i - j) <= 2:
242
+ row += "β–ˆβ–ˆ" # Diagonal
243
+ elif (i // tad_size) == (j // tad_size):
244
+ # Within same TAD
245
+ distance = abs(i - j)
246
+ if distance < tad_size // 2:
247
+ row += "β–“β–“"
248
+ elif distance < tad_size:
249
+ row += "β–‘β–‘"
250
+ else:
251
+ row += " "
252
+ else:
253
+ row += " "
254
+ map_lines.append(row)
255
+
256
+ return "\n".join(map_lines)
257
+
258
+
259
+ def create_insulation_profile(boundary_strengths: List[float]) -> str:
260
+ """Create ASCII insulation score profile."""
261
+ lines = []
262
+ lines.append("Insulation Score Profile:")
263
+ lines.append("=" * 60)
264
+
265
+ for i, strength in enumerate(boundary_strengths):
266
+ bar_len = int(strength * 40)
267
+ bar = "β–ˆ" * bar_len + "β–‘" * (40 - bar_len)
268
+ lines.append(f"Boundary {i+1}: [{bar}] {strength:.2f}")
269
+
270
+ lines.append("=" * 60)
271
+ return "\n".join(lines)
272
+
273
+
274
+ def create_grammar_diagram(mechanism: str) -> str:
275
+ """Create regulatory grammar visualization."""
276
+ if mechanism == "Regulatory Grammar Scrambling":
277
+ return """
278
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
279
+ β”‚ REGULATORY GRAMMAR ANALYSIS β”‚
280
+ β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
281
+ β”‚ β”‚
282
+ β”‚ REFERENCE GRAMMAR: β”‚
283
+ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β” β”‚
284
+ β”‚ β”‚CTCF+ β”‚ β†’ β”‚ ENHANCER β”‚ β†’ β”‚ PROMOTER β”‚ β†’ β”‚ GENE β”‚ β”‚
285
+ β”‚ β””β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”˜ β”‚
286
+ β”‚ "The enhancer activates the gene" β”‚
287
+ β”‚ β”‚
288
+ β”‚ VARIANT GRAMMAR (SCRAMBLED): β”‚
289
+ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β” β”‚
290
+ β”‚ β”‚ GENE β”‚ ← β”‚ PROMOTER β”‚ ← β”‚ ENHANCER β”‚ ← β”‚CTCF- β”‚ β”‚
291
+ β”‚ β””β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”˜ β”‚
292
+ β”‚ "Gene the activates enhancer the" ⚠ SCRAMBLED β”‚
293
+ β”‚ β”‚
294
+ β”‚ SEMANTIC BREAK SCORE: 0.85 (CRITICAL) β”‚
295
+ β”‚ β€’ Token Disruption: 30% weight β”‚
296
+ β”‚ β€’ Order Inversion: 40% weight β”‚
297
+ β”‚ β€’ Orientation Flip: 30% weight β”‚
298
+ β”‚ β”‚
299
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
300
+ """
301
+ elif mechanism == "Enhancer Hijacking":
302
+ return """
303
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
304
+ β”‚ ENHANCER HIJACKING ANALYSIS β”‚
305
+ β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
306
+ β”‚ β”‚
307
+ β”‚ NORMAL TOPOLOGY: β”‚
308
+ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚
309
+ β”‚ β”‚ TAD A β”‚ β”‚ TAD B β”‚ β”‚
310
+ β”‚ β”‚ [Enhancer]──────│─X───│──────[Oncogene] β”‚ β”‚
311
+ β”‚ β”‚ ↓ β”‚ β”‚ β”‚ β”‚
312
+ β”‚ β”‚ [Target Gene] β”‚ β”‚ β”‚ οΏ½οΏ½
313
+ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚
314
+ β”‚ BOUNDARY BLOCKS CONTACT β”‚
315
+ β”‚ β”‚
316
+ β”‚ AFTER STRUCTURAL VARIANT: β”‚
317
+ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚
318
+ β”‚ β”‚ FUSED TAD β”‚ β”‚
319
+ β”‚ β”‚ [Enhancer]═══════════════════[Oncogene] β”‚ β”‚
320
+ β”‚ β”‚ ↓ NEO-LOOP FORMED ↓ β”‚ β”‚
321
+ β”‚ β”‚ [Target Gene] ⚠ ONCOGENE ACTIVATED ⚠ β”‚ β”‚
322
+ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚
323
+ β”‚ β”‚
324
+ β”‚ LOOP STRENGTH: 0.89 | PRIORITY: CRITICAL β”‚
325
+ β”‚ β”‚
326
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
327
+ """
328
+ elif mechanism == "TAD Boundary Disruption":
329
+ return """
330
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
331
+ β”‚ TAD BOUNDARY DISRUPTION ANALYSIS β”‚
332
+ β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
333
+ β”‚ β”‚
334
+ β”‚ NORMAL INSULATION: β”‚
335
+ β”‚ β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ β”‚ β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ β”‚
336
+ β”‚ TAD A β”‚ TAD B β”‚
337
+ β”‚ CTCFβ”‚CTCF β”‚
338
+ β”‚ β–²β–²β–²β–²β–²β”‚β–²β–²β–²β–²β–² β”‚
339
+ β”‚ Strong Boundary β”‚
340
+ β”‚ β”‚
341
+ β”‚ AFTER VARIANT (CTCF SITE DISRUPTED): β”‚
342
+ β”‚ β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ β”‚
343
+ β”‚ TAD A β–‘β–‘β–‘β–‘β–‘β–‘β–‘ TAD B β”‚
344
+ β”‚ ↓↓↓↓↓ β”‚
345
+ β”‚ Insulation Leak β”‚
346
+ β”‚ β”‚
347
+ β”‚ INSULATION CHANGE: -52% β”‚
348
+ β”‚ BOUNDARY LOSS: MAJOR β”‚
349
+ β”‚ β”‚
350
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
351
+ """
352
+ else:
353
+ return """
354
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
355
+ β”‚ 3D GENOME ANALYSIS β”‚
356
+ β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
357
+ β”‚ β”‚
358
+ β”‚ No significant 3D structural impact detected. β”‚
359
+ β”‚ β”‚
360
+ β”‚ β€’ TAD boundaries: INTACT β”‚
361
+ β”‚ β€’ Insulation scores: NORMAL β”‚
362
+ β”‚ β€’ Regulatory grammar: PRESERVED β”‚
363
+ β”‚ β€’ CTCF binding: UNAFFECTED β”‚
364
+ β”‚ β”‚
365
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
366
+ """
367
+
368
+
369
+ def create_confidence_bars(evidence_codes: List[str], confidence: float) -> str:
370
+ """Create evidence code visualization."""
371
+ lines = []
372
+ lines.append("\n### Evidence Summary\n")
373
+
374
+ for code in evidence_codes:
375
+ # Determine strength from code prefix
376
+ if code.startswith("PS"):
377
+ strength = "Strong"
378
+ bar = "β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ"
379
+ color = "🟒"
380
+ elif code.startswith("PM"):
381
+ strength = "Moderate"
382
+ bar = "β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘"
383
+ color = "🟑"
384
+ elif code.startswith("PP"):
385
+ strength = "Supporting"
386
+ bar = "β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘"
387
+ color = "🟠"
388
+ elif code.startswith("BS") or code.startswith("BP"):
389
+ strength = "Benign"
390
+ bar = "β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ"
391
+ color = "πŸ”΅"
392
+ else:
393
+ strength = "Unknown"
394
+ bar = "β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘"
395
+ color = "βšͺ"
396
+
397
+ lines.append(f"{color} **{code}** ({strength}): `{bar}`")
398
+
399
+ lines.append(f"\n**Overall Confidence**: {confidence:.0%}")
400
+
401
+ return "\n".join(lines)
402
+
403
+
404
+ def generate_bsv_attestation(variant_id: str, classification: str, confidence: float) -> str:
405
+ """Generate and publish BSV blockchain attestation."""
406
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC")
407
+
408
+ # Create attestation data (no trade secrets - only results summary)
409
+ attestation_data = {
410
+ "platform": "TopoGrammar",
411
+ "version": "v2.1.0",
412
+ "timestamp": timestamp,
413
+ "analysis_type": "VUS_Resolution",
414
+ "variant_id": variant_id,
415
+ "classification": classification,
416
+ "confidence": round(confidence, 2),
417
+ "attestation_type": "demo"
418
+ }
419
+
420
+ # Create hash of the attestation data
421
+ data_str = json.dumps(attestation_data, sort_keys=True)
422
+ query_hash = hashlib.sha256(data_str.encode()).hexdigest()[:16]
423
+ full_hash = hashlib.sha256(data_str.encode()).hexdigest()
424
+
425
+ # Publish to BSV blockchain
426
+ txid = publish_to_bsv(attestation_data)
427
+
428
+ if txid:
429
+ # Real blockchain attestation
430
+ whatsonchain_url = f"https://whatsonchain.com/tx/{txid}"
431
+ status_line = f"Status: βœ“ RECORDED ON BSV MAINNET"
432
+ txid_display = txid[:20] + "..." if len(txid) > 20 else txid
433
+ verify_section = f"""β•‘ Transaction ID: {txid_display}
434
+ β•‘ β•‘
435
+ β•‘ πŸ”— Verify on WhatsOnChain: β•‘
436
+ β•‘ {whatsonchain_url[:54]}"""
437
+ else:
438
+ # Fallback if API fails
439
+ status_line = "Status: ⚠ OFFLINE MODE (BSV API unavailable)"
440
+ verify_section = f"""β•‘ Data Hash: {full_hash[:32]}...
441
+ β•‘ β•‘
442
+ β•‘ β„Ή Blockchain recording temporarily unavailable. β•‘
443
+ β•‘ Result hash preserved for later attestation."""
444
+
445
+ return f"""
446
+ ```
447
+ ╔══════════════════════════════════════════════════════════════╗
448
+ β•‘ TOPOGRAMMAR BSV ATTESTATION CERTIFICATE β•‘
449
+ ╠══════════════════════════════════════════════════════════════╣
450
+ β•‘ β•‘
451
+ β•‘ Query Hash: {query_hash} β•‘
452
+ β•‘ Timestamp: {timestamp} β•‘
453
+ β•‘ Model Version: TopoGrammar v2.1.0 β•‘
454
+ β•‘ β•‘
455
+ β•‘ ───────────────────────────────────────────────────────── β•‘
456
+ β•‘ β•‘
457
+ β•‘ Variant: {variant_id}
458
+ β•‘ Classification: {classification}
459
+ β•‘ Confidence: {confidence:.1%}
460
+ β•‘ β•‘
461
+ β•‘ ───────────────────────────────────────────────────────── β•‘
462
+ β•‘ β•‘
463
+ β•‘ {status_line}
464
+ β•‘ Network: BSV Mainnet β•‘
465
+ β•‘ β•‘
466
+ {verify_section}
467
+ β•‘ β•‘
468
+ β•‘ This attestation is immutably recorded on BSV blockchain. β•‘
469
+ β•‘ No proprietary algorithms or trade secrets are published. β•‘
470
+ β•‘ β•‘
471
+ β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
472
+ ```
473
+ """
474
+
475
+
476
+ def create_share_links(variant_id: str, classification: str) -> str:
477
+ """Create social sharing buttons."""
478
+ import urllib.parse
479
+
480
+ text = f"🧬 TopoGrammar reclassified {variant_id} as {classification}! Grammar-aware 3D genome analysis for precision medicine. #Genomics #AI #PrecisionMedicine"
481
+
482
+ twitter_url = f"https://twitter.com/intent/tweet?text={urllib.parse.quote(text)}&url=https://huggingface.co/spaces/GotThatData/TopoGrammar"
483
+ linkedin_url = f"https://www.linkedin.com/sharing/share-offsite/?url=https://huggingface.co/spaces/GotThatData/TopoGrammar"
484
+
485
+ return f"""
486
+ <div style="display: flex; gap: 10px; margin-top: 20px;">
487
+ <a href="{twitter_url}" target="_blank" style="background: #1DA1F2; color: white; padding: 10px 20px; border-radius: 5px; text-decoration: none;">
488
+ 🐦 Share on X
489
+ </a>
490
+ <a href="{linkedin_url}" target="_blank" style="background: #0077B5; color: white; padding: 10px 20px; border-radius: 5px; text-decoration: none;">
491
+ πŸ’Ό Share on LinkedIn
492
+ </a>
493
+ </div>
494
+ """
495
+
496
+
497
+ # =============================================================================
498
+ # Main Demo Functions
499
+ # =============================================================================
500
+
501
+ def run_vus_analysis(example_key: str) -> str:
502
+ """Run VUS resolution demo."""
503
+ if example_key not in VUS_EXAMPLES:
504
+ return "❌ Example not found. Please select a valid example."
505
+
506
+ # Simulate processing delay
507
+ time.sleep(1.5)
508
+
509
+ example = VUS_EXAMPLES[example_key]
510
+
511
+ # Build result markdown
512
+ result = f"""
513
+ # 🧬 VUS Resolution Analysis
514
+
515
+ ## Variant Information
516
+ | Field | Value |
517
+ |-------|-------|
518
+ | **Variant ID** | `{example['variant_id']}` |
519
+ | **Gene** | {example['gene']} |
520
+ | **Initial Classification** | {example['initial_class']} |
521
+
522
+ ---
523
+
524
+ ## TopoGrammar Analysis Result
525
+
526
+ ### Classification Update
527
+ | Before | β†’ | After |
528
+ |--------|---|-------|
529
+ | **{example['initial_class']}** | πŸ”„ | **{example['final_class']}** |
530
+
531
+ ### Confidence Score
532
+ {"β–ˆ" * int(example['confidence'] * 20)}{"β–‘" * (20 - int(example['confidence'] * 20))} **{example['confidence']:.0%}**
533
+
534
+ ### Primary Mechanism
535
+ **{example['mechanism']}**
536
+
537
+ ### 3D Structural Impact
538
+ - **Insulation Change**: {example['insulation_change']:+.0%}
539
+
540
+ ---
541
+
542
+ ## Mechanism Visualization
543
+
544
+ {create_grammar_diagram(example['mechanism'])}
545
+
546
+ ---
547
+
548
+ {create_confidence_bars(example['evidence_codes'], example['confidence'])}
549
+
550
+ ---
551
+
552
+ ## Clinical Interpretation
553
+
554
+ {example['description']}
555
+
556
+ ---
557
+
558
+ ## BSV Verification
559
+
560
+ {generate_bsv_attestation(example['variant_id'], example['final_class'], example['confidence'])}
561
+
562
+ ---
563
+
564
+ {create_share_links(example['variant_id'], example['final_class'])}
565
+ """
566
+
567
+ return result
568
+
569
+
570
+ def run_tad_analysis(example_key: str) -> str:
571
+ """Run TAD detection demo."""
572
+ if example_key not in TAD_EXAMPLES:
573
+ return "❌ Example not found. Please select a valid example."
574
+
575
+ time.sleep(1.0)
576
+
577
+ example = TAD_EXAMPLES[example_key]
578
+
579
+ result = f"""
580
+ # πŸ”¬ TAD Detection Analysis
581
+
582
+ ## Region Information
583
+ | Field | Value |
584
+ |-------|-------|
585
+ | **Region** | `{example['region']}` |
586
+ | **TADs Detected** | {example['n_tads']} |
587
+ | **Boundaries** | {example['n_boundaries']} |
588
+ | **CTCF Sites** | {example['ctcf_sites']} |
589
+
590
+ ---
591
+
592
+ ## Genes in Region
593
+ {', '.join([f"**{g}**" for g in example['genes']])}
594
+
595
+ ---
596
+
597
+ ## Contact Map Visualization
598
+
599
+ ```
600
+ {create_contact_map_ascii(example['region'], example['n_tads'])}
601
+ ```
602
+
603
+ ---
604
+
605
+ ## Boundary Strength Profile
606
+
607
+ ```
608
+ {create_insulation_profile(example['boundary_strength'])}
609
+ ```
610
+
611
+ ---
612
+
613
+ ## Detection Method
614
+
615
+ TopoGrammar uses **CTCF-gated boundary detection** which achieves:
616
+ - **91% TAD accuracy** (vs 80% for HiCCUPS)
617
+ - **0.91 F1 score** for boundary detection
618
+ - **Sub-TAD detection** capability
619
+
620
+ The CTCF-gating mechanism ensures boundaries are only called where:
621
+ 1. Insulation score shows local minimum
622
+ 2. CTCF binding evidence is present
623
+ 3. Gradient analysis confirms boundary
624
+
625
+ ---
626
+
627
+ ## Benchmark Comparison
628
+
629
+ | Method | TAD Accuracy | Boundary F1 | Sub-TAD |
630
+ |--------|-------------|-------------|---------|
631
+ | **TopoGrammar** | **91%** | **0.91** | βœ“ |
632
+ | HiCCUPS | 80% | 0.76 | βœ— |
633
+ | Arrowhead | 78% | 0.74 | βœ— |
634
+ | TopDom | 75% | 0.71 | βœ— |
635
+ """
636
+
637
+ return result
638
+
639
+
640
+ def run_neoloop_analysis(example_key: str) -> str:
641
+ """Run neo-loop detection demo."""
642
+ if example_key not in NEOLOOP_EXAMPLES:
643
+ return "❌ Example not found. Please select a valid example."
644
+
645
+ time.sleep(1.2)
646
+
647
+ example = NEOLOOP_EXAMPLES[example_key]
648
+
649
+ # Priority styling
650
+ if example['clinical_priority'] == "Critical":
651
+ priority_emoji = "πŸ”΄"
652
+ priority_style = "color: red; font-weight: bold;"
653
+ else:
654
+ priority_emoji = "🟑"
655
+ priority_style = "color: orange; font-weight: bold;"
656
+
657
+ result = f"""
658
+ # οΏ½οΏ½οΏ½ Neo-Loop Detection Analysis
659
+
660
+ ## Structural Variant
661
+ | Field | Value |
662
+ |-------|-------|
663
+ | **SV Type** | `{example['sv_type']}` |
664
+ | **Cancer Type** | {example['cancer_type']} |
665
+ | **Clinical Priority** | {priority_emoji} **{example['clinical_priority']}** |
666
+
667
+ ---
668
+
669
+ ## Oncogene Activation
670
+
671
+ ### Activated Oncogene
672
+ **{example['oncogene']}**
673
+
674
+ ### Hijacked Enhancer
675
+ **{example['hijacked_enhancer']}**
676
+
677
+ ### Neo-Loop Strength
678
+ {"β–ˆ" * int(example['loop_strength'] * 20)}{"β–‘" * (20 - int(example['loop_strength'] * 20))} **{example['loop_strength']:.0%}**
679
+
680
+ ---
681
+
682
+ ## Mechanism Visualization
683
+
684
+ ```
685
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
686
+ β”‚ NEO-LOOP FORMATION β”‚
687
+ β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
688
+ β”‚ β”‚
689
+ β”‚ BEFORE: {example['sv_type']}
690
+ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚
691
+ β”‚ β”‚ Enhancer │─ ─ X ─ ─│ Oncogene β”‚ β”‚
692
+ β”‚ β”‚ Domain A β”‚ β”‚ Domain B β”‚ β”‚
693
+ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚
694
+ β”‚ ↓ β”‚
695
+ β”‚ Normal Target β”‚
696
+ β”‚ β”‚
697
+ β”‚ AFTER: {example['sv_type']}
698
+ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚
699
+ β”‚ β”‚ Enhancer ══════════ Oncogene β”‚ β”‚
700
+ β”‚ β”‚ ↓ NEO-LOOP ↓ β”‚ β”‚
701
+ β”‚ β”‚ ABERRANT ACTIVATION β”‚ β”‚
702
+ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚
703
+ β”‚ β”‚
704
+ β”‚ Loop Strength: {example['loop_strength']:.2f}
705
+ β”‚ Priority: {example['clinical_priority']}
706
+ β”‚ β”‚
707
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
708
+ ```
709
+
710
+ ---
711
+
712
+ ## Clinical Interpretation
713
+
714
+ {example['description']}
715
+
716
+ ---
717
+
718
+ ## Actionable Insights
719
+
720
+ Based on this neo-loop detection:
721
+
722
+ 1. **Molecular Testing**: Confirm {example['sv_type']} by FISH or karyotyping
723
+ 2. **Targeted Therapy**: Consider therapies targeting {example['oncogene']} pathway
724
+ 3. **Clinical Trial**: Patient may be eligible for trials targeting this mechanism
725
+ 4. **Monitoring**: Track {example['oncogene']} expression as biomarker
726
+
727
+ ---
728
+
729
+ ## BSV Verification
730
+
731
+ {generate_bsv_attestation(example['sv_type'], f"Neo-loop: {example['oncogene']}", example['loop_strength'])}
732
+ """
733
+
734
+ return result
735
+
736
+
737
+ def show_benchmarks() -> str:
738
+ """Show benchmark results."""
739
+ return f"""
740
+ # πŸ“Š TopoGrammar Benchmarks
741
+
742
+ ## TAD Detection Performance
743
+
744
+ | Method | TAD Accuracy | Boundary F1 | Sub-TAD Detection |
745
+ |--------|-------------|-------------|-------------------|
746
+ | **TopoGrammar (CTCF-gated)** | **91%** | **0.91** | βœ“ Yes |
747
+ | HiCCUPS | 80% | 0.76 | βœ— No |
748
+ | Arrowhead | 78% | 0.74 | βœ— No |
749
+ | TopDom | 75% | 0.71 | βœ— No |
750
+
751
+ ---
752
+
753
+ ## VUS Resolution Performance
754
+
755
+ | Metric | Value |
756
+ |--------|-------|
757
+ | **Reclassification Rate** | 68% of VUS variants |
758
+ | **Pathogenic Accuracy** | 93% |
759
+ | **Mean Confidence Score** | 87% |
760
+
761
+ ---
762
+
763
+ ## Insulation Density Improvement
764
+
765
+ | Boundary Set | Insulation Ratio | P-value |
766
+ |--------------|------------------|---------|
767
+ | Major TADs only | 9.11x | <0.0001 |
768
+ | **All Boundaries** | **15.57x** | **<0.0001** |
769
+ | **Improvement** | **+70.9%** | β€” |
770
+
771
+ ---
772
+
773
+ ## What Makes TopoGrammar Different
774
+
775
+ ### 1. Grammar-Aware Architecture
776
+
777
+ Traditional tools see DNA as a string of letters. TopoGrammar sees it as **sentences with grammar**:
778
+
779
+ ```
780
+ Reference: [CTCF+] [Enhancer] [Promoter] [Gene]
781
+ "The enhancer activates the gene"
782
+
783
+ Inversion: [Gene] [Promoter] [Enhancer] [CTCF-]
784
+ "Gene the activates enhancer the" ← SCRAMBLED
785
+ ```
786
+
787
+ ### 2. Physics + Semantics Concordance
788
+
789
+ When both physics (insulation collapse) AND semantics (grammar scramble) agree:
790
+
791
+ | Evidence Type | Alone | Concordant |
792
+ |--------------|-------|------------|
793
+ | Physics | PM1 (Moderate) | β€” |
794
+ | Semantics | PM1 (Moderate) | β€” |
795
+ | **Both** | β€” | **PS3 (Strong)** ← UPGRADE |
796
+
797
+ ### 3. CTCF-Gated Detection
798
+
799
+ Unlike other tools, TopoGrammar only calls boundaries where:
800
+ - βœ“ Insulation score shows local minimum
801
+ - βœ“ CTCF binding evidence is present
802
+ - βœ“ Gradient analysis confirms boundary
803
+
804
+ This reduces false positives by **40%** compared to insulation-only methods.
805
+
806
+ ---
807
+
808
+ ## Clinical Value Pillars
809
+
810
+ | Pillar | Clinical Value | Technical Foundation |
811
+ |--------|---------------|---------------------|
812
+ | **Architectural Fidelity** | Eliminates VUS by proving physical boundary collapse | PINN Physics (15.57x insulation) |
813
+ | **Semantic Intelligence** | Detects "scrambled" instructions in balanced inversions | Regulatory Grammar Encoder |
814
+ | **Privacy-First Growth** | Global model evolution without data leakage | Async Federated Learning |
815
+ | **Clinician Clarity** | High-level medical prose instead of raw math | LLM Interpretation Layer |
816
+ """
817
+
818
+
819
+ # =============================================================================
820
+ # Gradio Interface
821
+ # =============================================================================
822
+
823
+ HEADER_MD = """
824
+ # 🧬 TopoGrammar
825
+
826
+ ## The Industry's First Grammar-Aware 3D Genome Engine
827
+
828
+ **Balanced Structural Variants (BSVs)** - inversions, translocations, complex rearrangements - appear "silent" to standard sequencers because they don't change gene dosage. But they **scramble the regulatory grammar** that controls gene expression.
829
+
830
+ **TopoGrammar solves this.** It's the first engine that understands chromatin as a *language* with grammar rules that can be broken.
831
+
832
+ ---
833
+
834
+ | Capability | Performance |
835
+ |------------|-------------|
836
+ | 🎯 VUS Reclassification | 68% of variants |
837
+ | πŸ“Š Pathogenic Accuracy | 93% confidence |
838
+ | πŸ”¬ TAD Detection F1 | 0.91 (vs 0.76 HiCCUPS) |
839
+ | ⚑ Sub-TAD Detection | Yes (unique capability) |
840
+
841
+ ---
842
+ """
843
+
844
+ ABOUT_MD = """
845
+ # ℹ️ About TopoGrammar
846
+
847
+ ## Overview
848
+
849
+ TopoGrammar is part of the **OmniPrime Enterprise Platform**, integrating:
850
+ - **TopoGrammar v2.1.0** - Grammar-Aware 3D Genome Engine
851
+ - **BioPrime v4.0 "Golden"** - Physics-First Molecular Docking
852
+
853
+ Together, they enable a seamless **Patient Genome β†’ Drug Candidate** workflow.
854
+
855
+ ---
856
+
857
+ ## Core Innovation
858
+
859
+ ### Regulatory Grammar Analysis
860
+
861
+ TopoGrammar treats regulatory elements as a **language**:
862
+
863
+ ```
864
+ CTCF β†’ Enhancer β†’ Promoter β†’ Gene
865
+ "The enhancer activates the gene"
866
+ ```
867
+
868
+ When structural variants **scramble** this grammar, TopoGrammar detects it:
869
+
870
+ ```
871
+ Gene ← Promoter ← Enhancer ← CTCF
872
+ "Gene the activates enhancer the" ⚠ PATHOGENIC
873
+ ```
874
+
875
+ ### Semantic Break Score
876
+
877
+ Quantifies regulatory disruption:
878
+ - **Token Disruption (30%)**: Elements removed or duplicated
879
+ - **Order Inversion (40%)**: Sequence rearranged
880
+ - **Orientation Flip (30%)**: Strand direction reversed
881
+
882
+ ---
883
+
884
+ ## Technology Stack
885
+
886
+ - **Physics-Informed Neural Networks (PINNs)** for 3D reconstruction
887
+ - **CTCF-Gated Boundary Detection** for precise TAD calling
888
+ - **Federated Learning** for privacy-preserving multi-site training
889
+ - **LLM Interpretation** for clinical reporting
890
+ - **BSV Blockchain** for result attestation
891
+
892
+ ---
893
+
894
+ ## Creators
895
+
896
+ - **Bryan Daugherty**
897
+ - **Gregory Ward**
898
+ - **Shawn Ryan**
899
+
900
+ ---
901
+
902
+ ## Learn More
903
+
904
+ 🌐 [bioprime.one](https://bioprime.one) | 🧬 [OmniPrime Platform](https://github.com/Saifullah62/OmniPrime_v1.0)
905
+
906
+ ---
907
+
908
+ **Copyright (c) 2026 Bryan Daugherty, Gregory Ward & Shawn Ryan. All Rights Reserved.**
909
+
910
+ *This demo showcases TopoGrammar capabilities. Actual clinical use requires the full OmniPrime Enterprise Platform.*
911
+ """
912
+
913
+ # Custom CSS
914
+ CUSTOM_CSS = """
915
+ .gradio-container {
916
+ background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%);
917
+ }
918
+ .gr-button-primary {
919
+ background: linear-gradient(90deg, #00d4ff, #00ff88) !important;
920
+ border: none !important;
921
+ }
922
+ .gr-button-secondary {
923
+ background: linear-gradient(90deg, #667eea, #764ba2) !important;
924
+ border: none !important;
925
+ color: white !important;
926
+ }
927
+ """
928
+
929
+ # Build the interface
930
+ with gr.Blocks(
931
+ title="TopoGrammar - Grammar-Aware 3D Genome Engine",
932
+ theme=gr.themes.Base(
933
+ primary_hue="cyan",
934
+ secondary_hue="purple",
935
+ neutral_hue="slate",
936
+ ),
937
+ css=CUSTOM_CSS,
938
+ ) as demo:
939
+
940
+ gr.Markdown(HEADER_MD)
941
+
942
+ with gr.Tabs():
943
+ # Tab 1: VUS Resolution
944
+ with gr.TabItem("🧬 VUS Resolution"):
945
+ gr.Markdown("""
946
+ ## Variant of Uncertain Significance β†’ Clinical Classification
947
+
948
+ Select a demo variant to see how TopoGrammar reclassifies VUS using 3D genome analysis.
949
+ """)
950
+
951
+ with gr.Row():
952
+ with gr.Column(scale=1):
953
+ gr.Markdown("### Select Example")
954
+ vus_brca1 = gr.Button("πŸ”΄ BRCA1 Boundary Disruption", variant="secondary")
955
+ vus_myc = gr.Button("πŸ”΄ MYC Enhancer Hijacking", variant="secondary")
956
+ vus_shh = gr.Button("🟑 SHH Limb Enhancer", variant="secondary")
957
+ vus_tp53 = gr.Button("πŸ”΄ TP53 Grammar Scrambling", variant="secondary")
958
+ vus_benign = gr.Button("🟒 Benign Intronic SNP", variant="secondary")
959
+
960
+ with gr.Column(scale=3):
961
+ vus_output = gr.Markdown("*Select an example to run VUS analysis*")
962
+
963
+ vus_brca1.click(fn=lambda: run_vus_analysis("brca1_boundary"), outputs=vus_output)
964
+ vus_myc.click(fn=lambda: run_vus_analysis("myc_enhancer_hijack"), outputs=vus_output)
965
+ vus_shh.click(fn=lambda: run_vus_analysis("sonic_hedgehog"), outputs=vus_output)
966
+ vus_tp53.click(fn=lambda: run_vus_analysis("tp53_scramble"), outputs=vus_output)
967
+ vus_benign.click(fn=lambda: run_vus_analysis("benign_intronic"), outputs=vus_output)
968
+
969
+ # Tab 2: TAD Detection
970
+ with gr.TabItem("πŸ”¬ TAD Detection"):
971
+ gr.Markdown("""
972
+ ## Topologically Associating Domain Detection
973
+
974
+ See how TopoGrammar detects TAD boundaries with CTCF-gating for 91% accuracy.
975
+ """)
976
+
977
+ with gr.Row():
978
+ with gr.Column(scale=1):
979
+ gr.Markdown("### Select Region")
980
+ tad_dscr = gr.Button("Chr21 - Down Syndrome Region", variant="secondary")
981
+ tad_egfr = gr.Button("Chr7 - EGFR Locus", variant="secondary")
982
+ tad_myc = gr.Button("Chr8 - MYC Oncogene", variant="secondary")
983
+
984
+ with gr.Column(scale=3):
985
+ tad_output = gr.Markdown("*Select a region to analyze TAD structure*")
986
+
987
+ tad_dscr.click(fn=lambda: run_tad_analysis("chr21_dscr"), outputs=tad_output)
988
+ tad_egfr.click(fn=lambda: run_tad_analysis("chr7_egfr"), outputs=tad_output)
989
+ tad_myc.click(fn=lambda: run_tad_analysis("chr8_myc"), outputs=tad_output)
990
+
991
+ # Tab 3: Neo-Loop Detection
992
+ with gr.TabItem("πŸ§ͺ Neo-Loop Detection"):
993
+ gr.Markdown("""
994
+ ## Cancer Neo-Loop & Enhancer Hijacking Detection
995
+
996
+ Identify oncogene activation through structural variant-induced neo-loops.
997
+ """)
998
+
999
+ with gr.Row():
1000
+ with gr.Column(scale=1):
1001
+ gr.Markdown("### Select Cancer Example")
1002
+ neo_burkitt = gr.Button("πŸ”΄ Burkitt Lymphoma (MYC)", variant="secondary")
1003
+ neo_ewing = gr.Button("πŸ”΄ Ewing Sarcoma (EWSR1)", variant="secondary")
1004
+ neo_aml = gr.Button("🟑 AML (RUNX1)", variant="secondary")
1005
+
1006
+ with gr.Column(scale=3):
1007
+ neo_output = gr.Markdown("*Select a cancer example to detect neo-loops*")
1008
+
1009
+ neo_burkitt.click(fn=lambda: run_neoloop_analysis("burkitt_myc"), outputs=neo_output)
1010
+ neo_ewing.click(fn=lambda: run_neoloop_analysis("ewing_ewsr1"), outputs=neo_output)
1011
+ neo_aml.click(fn=lambda: run_neoloop_analysis("aml_runx1"), outputs=neo_output)
1012
+
1013
+ # Tab 4: Benchmarks
1014
+ with gr.TabItem("πŸ“Š Benchmarks"):
1015
+ gr.Markdown(show_benchmarks())
1016
+
1017
+ # Tab 5: About
1018
+ with gr.TabItem("ℹ️ About"):
1019
+ gr.Markdown(ABOUT_MD)
1020
+
1021
+ gr.Markdown("---")
1022
+ gr.Markdown("""
1023
+ <center>
1024
+
1025
+ **TopoGrammar v2.1.0** | Part of **OmniPrime Enterprise Platform**
1026
+
1027
+ [🌐 bioprime.one](https://bioprime.one) | [🧬 GitHub](https://github.com/Saifullah62/OmniPrime_v1.0) | [πŸ“§ Contact](mailto:info@bioprime.one)
1028
+
1029
+ *This is a demonstration. Clinical use requires the full OmniPrime Enterprise Platform.*
1030
+
1031
+ </center>
1032
+ """)
1033
+
1034
+
1035
+ if __name__ == "__main__":
1036
+ demo.launch()