vigneshwar234 commited on
Commit
efe04fc
Β·
verified Β·
1 Parent(s): 172ec45

Add app.py

Browse files
Files changed (1) hide show
  1. app.py +656 -0
app.py ADDED
@@ -0,0 +1,656 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ VORTEXRAG Interactive Demo
3
+ Vector Orthogonal Resonance-Tuned EXtraction RAG
4
+ A 7-Layer Framework for Causal Retrieval-Augmented Generation
5
+
6
+ Author: Vignesh L | DOI: 10.5281/zenodo.20285144
7
+ GitHub: https://github.com/vignesh2027/VORTEXRAG
8
+ """
9
+
10
+ import gradio as gr
11
+ import math
12
+ import random
13
+ import json
14
+ import re
15
+ from typing import List, Dict, Tuple, Optional
16
+
17
+ # ─── Domain Presets ────────────────────────────────────────────────────────────
18
+ DOMAIN_PRESETS = {
19
+ "general": {"alpha": 0.50, "beta": 0.25, "gamma": 0.25, "tau": 0.80, "theta_cpg": 3.5, "delta_sdc": 0.72, "delta_fv": 0.15},
20
+ "medical": {"alpha": 0.45, "beta": 0.15, "gamma": 0.40, "tau": 0.35, "theta_cpg": 5.0, "delta_sdc": 0.75, "delta_fv": 0.10},
21
+ "legal": {"alpha": 0.35, "beta": 0.30, "gamma": 0.35, "tau": 0.40, "theta_cpg": 4.5, "delta_sdc": 0.72, "delta_fv": 0.15},
22
+ "financial": {"alpha": 0.45, "beta": 0.25, "gamma": 0.30, "tau": 0.50, "theta_cpg": 3.5, "delta_sdc": 0.70, "delta_fv": 0.20},
23
+ "scientific": {"alpha": 0.40, "beta": 0.20, "gamma": 0.40, "tau": 0.30, "theta_cpg": 4.0, "delta_sdc": 0.76, "delta_fv": 0.15},
24
+ "code": {"alpha": 0.30, "beta": 0.45, "gamma": 0.25, "tau": 0.60, "theta_cpg": 3.5, "delta_sdc": 0.68, "delta_fv": 0.20},
25
+ "cybersecurity":{"alpha": 0.35, "beta": 0.30, "gamma": 0.35, "tau": 0.45, "theta_cpg": 4.0, "delta_sdc": 0.72, "delta_fv": 0.15},
26
+ "educational": {"alpha": 0.55, "beta": 0.20, "gamma": 0.25, "tau": 0.65, "theta_cpg": 3.0, "delta_sdc": 0.65, "delta_fv": 0.20},
27
+ "historical": {"alpha": 0.45, "beta": 0.20, "gamma": 0.35, "tau": 0.90, "theta_cpg": 3.0, "delta_sdc": 0.65, "delta_fv": 0.20},
28
+ "creative": {"alpha": 0.65, "beta": 0.20, "gamma": 0.15, "tau": 1.20, "theta_cpg": 2.5, "delta_sdc": 0.55, "delta_fv": 0.25},
29
+ }
30
+
31
+ # ─── Causal Feature Detection ──────────────────────────────────────────────────
32
+ CAUSAL_CONNECTIVES = [
33
+ "because", "since", "as", "therefore", "thus", "hence", "consequently",
34
+ "accordingly", "owing to", "due to", "because of", "as a result",
35
+ "results in", "leads to", "causes", "enables", "triggers", "produces",
36
+ "brings about", "is responsible for", "contributes to", "stems from",
37
+ "arises from", "follows from", "so that", "thereby", "given that",
38
+ ]
39
+
40
+ CAUSAL_VERBS = [
41
+ "cause", "enable", "trigger", "produce", "generate", "induce", "drive",
42
+ "lead", "result", "create", "allow", "force", "make", "bring", "spark",
43
+ "initiate", "originate", "stem", "arise", "follow", "influence", "affect",
44
+ "determine", "contribute", "prevent", "inhibit",
45
+ ]
46
+
47
+ TEMPORAL_MARKERS = [
48
+ "before", "after", "then", "subsequently", "previously", "first",
49
+ "finally", "later", "earlier", "following", "preceding", "once", "until",
50
+ ]
51
+
52
+ def compute_causal_density(text: str) -> float:
53
+ """Compute causal signal density for a text chunk."""
54
+ text_lower = text.lower()
55
+ words = text_lower.split()
56
+ sentences = max(1, text.count('.') + text.count('?') + text.count('!'))
57
+ tokens = max(1, len(words))
58
+
59
+ conn_density = sum(1 for c in CAUSAL_CONNECTIVES if c in text_lower) / sentences
60
+ verb_density = sum(1 for v in CAUSAL_VERBS if v in words) / tokens
61
+ temp_density = sum(1 for t in TEMPORAL_MARKERS if t in words) / sentences
62
+
63
+ # Normalize to [0, 1]
64
+ score = min(1.0, (conn_density * 0.5 + verb_density * 10 + temp_density * 0.3))
65
+ return round(score, 3)
66
+
67
+ def compute_semantic_density(text: str, query: str) -> float:
68
+ """Rough semantic similarity via shared significant words."""
69
+ q_words = set(re.findall(r'\b\w{4,}\b', query.lower()))
70
+ t_words = set(re.findall(r'\b\w{4,}\b', text.lower()))
71
+ if not q_words or not t_words:
72
+ return 0.0
73
+ overlap = len(q_words & t_words)
74
+ union = len(q_words | t_words)
75
+ # Add a small base to avoid everything being too low
76
+ base = 0.40
77
+ jaccard = overlap / union if union > 0 else 0
78
+ return round(min(1.0, base + jaccard * 1.5), 3)
79
+
80
+ def compute_sds(causal_density_query: float, causal_density_chunk: float, tau: float) -> float:
81
+ """Semantic Drift Score: SDS = 1 - tanh(||D|| / tau)"""
82
+ drift_magnitude = abs(causal_density_query - causal_density_chunk)
83
+ sds = 1.0 - math.tanh(drift_magnitude / tau)
84
+ return round(max(0.0, min(1.0, sds)), 3)
85
+
86
+ def compute_spiral_rank(tve_score: float, causal_offset: float, lambda_val: float = 0.5, n: int = 2) -> float:
87
+ """VRC Spiral Rank: spiral_rank = TVE * exp(-lambda*r) * cos(n*theta)"""
88
+ r = 1.0 - tve_score
89
+ radial_decay = math.exp(-lambda_val * r)
90
+ spiral_mod = math.cos(n * causal_offset)
91
+ return round(tve_score * radial_decay * spiral_mod, 4)
92
+
93
+ def compute_phi(tve: float, sds: float, esr_contrib: float,
94
+ alpha: float, beta: float, gamma: float) -> float:
95
+ """RFG Phi score: Phi = TVE^alpha * SDS^beta * ESR_contrib^gamma"""
96
+ tve = max(0.001, tve)
97
+ sds = max(0.001, sds)
98
+ esr_contrib = max(0.001, esr_contrib)
99
+ return round((tve ** alpha) * (sds ** beta) * (esr_contrib ** gamma), 4)
100
+
101
+ def compute_esr(chunks_data: List[Dict]) -> float:
102
+ """Compute Effective Signal Ratio for window."""
103
+ if not chunks_data:
104
+ return 0.0
105
+ epsilon = 1e-6
106
+ w_sum = sum(c.get('tve_score', 0.5) for c in chunks_data)
107
+ if w_sum == 0:
108
+ return 0.0
109
+ # Softmax weights
110
+ weights = [c.get('tve_score', 0.5) / w_sum for c in chunks_data]
111
+ signal = sum(c['sds'] * w for c, w in zip(chunks_data, weights))
112
+ poison = sum((1 - c['sds']) * w for c, w in zip(chunks_data, weights)) / max(1, len(chunks_data))
113
+ return round(signal / (poison + epsilon), 3)
114
+
115
+ def run_vortexrag_pipeline(query: str, chunks: List[str], domain: str) -> Dict:
116
+ """
117
+ Run the full VORTEXRAG 7-layer pipeline and return trace.
118
+ Uses deterministic scoring without external models.
119
+ """
120
+ preset = DOMAIN_PRESETS.get(domain, DOMAIN_PRESETS["general"])
121
+ alpha, beta, gamma = preset["alpha"], preset["beta"], preset["gamma"]
122
+ tau, theta_cpg = preset["tau"], preset["theta_cpg"]
123
+ delta_sdc, delta_fv = preset["delta_sdc"], preset["delta_fv"]
124
+
125
+ trace = {}
126
+
127
+ # ── Layer 1: TVE ────────────────────────────────────────────────────────────
128
+ query_causal = compute_causal_density(query)
129
+ trace["L1_TVE"] = {
130
+ "query_causal_density": query_causal,
131
+ "domain": domain,
132
+ "weights": f"Ξ±={alpha}, Ξ²={beta}, Ξ³={gamma}",
133
+ "note": f"Query causal density: {query_causal:.3f} β€” {'high causal' if query_causal > 0.3 else 'moderate causal' if query_causal > 0.1 else 'low causal'} query"
134
+ }
135
+
136
+ # Score each chunk
137
+ chunk_scores = []
138
+ for i, text in enumerate(chunks):
139
+ sem = compute_semantic_density(text, query)
140
+ cau = compute_causal_density(text)
141
+ # Syntactic score approximation (longer, more structured text β†’ higher)
142
+ syn = min(1.0, len(text.split('.')) * 0.15 + 0.3)
143
+ tve_score = round(alpha * sem + beta * syn + gamma * cau, 3)
144
+ chunk_scores.append({
145
+ "id": i,
146
+ "text": text[:120] + "..." if len(text) > 120 else text,
147
+ "sem": sem,
148
+ "syn": round(syn, 3),
149
+ "cau": cau,
150
+ "tve_score": tve_score,
151
+ })
152
+
153
+ trace["L1_TVE"]["chunks"] = chunk_scores
154
+
155
+ # ── Layer 2: VRC ────────────────────────────────────────────────────────────
156
+ vrc_chunks = []
157
+ for c in chunk_scores:
158
+ causal_offset = abs(c["cau"] - query_causal) * math.pi
159
+ spiral = compute_spiral_rank(c["tve_score"], causal_offset)
160
+ c["spiral_rank"] = spiral
161
+ c["causal_offset_rad"] = round(causal_offset, 3)
162
+ c["filtered_vrc"] = spiral < 0
163
+ if not c["filtered_vrc"]:
164
+ vrc_chunks.append(c)
165
+
166
+ vrc_chunks_sorted = sorted(vrc_chunks, key=lambda x: x["spiral_rank"], reverse=True)
167
+ trace["L2_VRC"] = {
168
+ "total_input": len(chunk_scores),
169
+ "after_vrc": len(vrc_chunks_sorted),
170
+ "filtered": len(chunk_scores) - len(vrc_chunks_sorted),
171
+ "top_candidates": [{
172
+ "id": c["id"],
173
+ "text_preview": c["text"][:80] + "...",
174
+ "tve": c["tve_score"],
175
+ "spiral_rank": c["spiral_rank"],
176
+ "causal_offset": f"{math.degrees(c['causal_offset_rad']):.1f}Β°"
177
+ } for c in vrc_chunks_sorted[:5]]
178
+ }
179
+
180
+ # ── Layer 3: SDC ────────────────────────────────────────────────────────────
181
+ sdc_accepted = []
182
+ sdc_rejected = []
183
+ for c in vrc_chunks_sorted:
184
+ sds = compute_sds(query_causal, c["cau"], tau)
185
+ c["sds"] = sds
186
+ if sds >= delta_sdc:
187
+ sdc_accepted.append(c)
188
+ else:
189
+ c["reject_reason"] = f"SDS={sds:.3f} < Ξ΄_SDC={delta_sdc}"
190
+ sdc_rejected.append(c)
191
+
192
+ trace["L3_SDC"] = {
193
+ "tau": tau,
194
+ "delta_sdc": delta_sdc,
195
+ "accepted": len(sdc_accepted),
196
+ "rejected": len(sdc_rejected),
197
+ "rejected_chunks": [{
198
+ "id": c["id"],
199
+ "text_preview": c["text"][:80] + "...",
200
+ "sds": c["sds"],
201
+ "reason": c.get("reject_reason", "")
202
+ } for c in sdc_rejected],
203
+ "accepted_ids": [c["id"] for c in sdc_accepted]
204
+ }
205
+
206
+ # ── Layer 4: CPG ────────────────────────────────────────────────────────────
207
+ window = list(sdc_accepted)
208
+ cpg_purge_log = []
209
+ iterations = 0
210
+
211
+ while window:
212
+ esr = compute_esr(window)
213
+ if esr >= theta_cpg or len(window) <= 3:
214
+ break
215
+ # Find chunk with minimum SDS
216
+ min_chunk = min(window, key=lambda c: c["sds"])
217
+ cpg_purge_log.append({
218
+ "iteration": iterations + 1,
219
+ "purged_id": min_chunk["id"],
220
+ "purged_sds": min_chunk["sds"],
221
+ "esr_before": round(esr, 3),
222
+ "text_preview": min_chunk["text"][:60] + "..."
223
+ })
224
+ window = [c for c in window if c["id"] != min_chunk["id"]]
225
+ iterations += 1
226
+ if iterations > 10:
227
+ break
228
+
229
+ final_esr = compute_esr(window)
230
+ trace["L4_CPG"] = {
231
+ "theta_cpg": theta_cpg,
232
+ "final_esr": round(final_esr, 3),
233
+ "window_clean": final_esr >= theta_cpg,
234
+ "purge_steps": iterations,
235
+ "purge_log": cpg_purge_log,
236
+ "remaining_chunks": len(window)
237
+ }
238
+
239
+ # ── Layer 5: RFG ────────────────────────────────────────────────────────────
240
+ if not window:
241
+ window = list(sdc_accepted)[:3] # fallback
242
+
243
+ w_sum = sum(c["tve_score"] for c in window)
244
+ sig_sum = sum(c["sds"] * c["tve_score"] / max(w_sum, 0.001) for c in window)
245
+
246
+ for c in window:
247
+ w_i = c["tve_score"] / max(w_sum, 0.001)
248
+ esr_contrib = (c["sds"] * w_i) / max(sig_sum, 0.001)
249
+ c["esr_contrib"] = round(esr_contrib, 4)
250
+ c["phi"] = compute_phi(c["tve_score"], c["sds"], c["esr_contrib"], alpha, beta, gamma)
251
+
252
+ phi_sum = sum(c["phi"] for c in window)
253
+ for c in window:
254
+ c["phi_plus"] = round(c["phi"] / max(phi_sum, 0.001), 4)
255
+
256
+ ranked = sorted(window, key=lambda x: x["phi_plus"], reverse=True)
257
+ top_m = ranked[:8]
258
+
259
+ trace["L5_RFG"] = {
260
+ "ranked": [{
261
+ "id": c["id"],
262
+ "text_preview": c["text"][:80] + "...",
263
+ "tve": c["tve_score"],
264
+ "sds": c["sds"],
265
+ "phi_plus": c["phi_plus"]
266
+ } for c in top_m]
267
+ }
268
+
269
+ # ── Layer 6: CCB ────────────────────────────────────────────────────────────
270
+ # Assign causal depth based on causal density rank
271
+ sorted_by_cau = sorted(top_m, key=lambda x: x["cau"], reverse=True)
272
+ for depth, c in enumerate(sorted_by_cau):
273
+ c["causal_depth"] = depth
274
+
275
+ for rank_i, c in enumerate(top_m):
276
+ c["ccb_pos"] = rank_i * c["causal_depth"]
277
+
278
+ ordered = sorted(top_m, key=lambda x: x["ccb_pos"])
279
+ trace["L6_CCB"] = {
280
+ "ordered_context": [{
281
+ "position": i,
282
+ "chunk_id": c["id"],
283
+ "causal_depth": c["causal_depth"],
284
+ "ccb_pos": c["ccb_pos"],
285
+ "text_preview": c["text"][:100] + "..."
286
+ } for i, c in enumerate(ordered)]
287
+ }
288
+
289
+ # ── Layer 7: FV ─────────────────────────────────────────────────────────────
290
+ # Simulate faithfulness verification
291
+ context_richness = sum(c["sds"] * c["phi_plus"] for c in ordered)
292
+ rouge_l_sim = min(0.95, 0.5 + context_richness * 0.4)
293
+ nli_sim = min(0.98, 0.6 + context_richness * 0.35)
294
+ delta_r = round(1.0 - rouge_l_sim * nli_sim, 3)
295
+ accepted = delta_r <= delta_fv
296
+
297
+ trace["L7_FV"] = {
298
+ "delta_fv": delta_fv,
299
+ "rouge_l": round(rouge_l_sim, 3),
300
+ "nli_entailment": round(nli_sim, 3),
301
+ "delta_r": delta_r,
302
+ "accepted": accepted,
303
+ "verdict": "βœ… ACCEPTED" if accepted else f"⚠️ RETRY (Ξ”R={delta_r} > Ξ΄_FV={delta_fv})",
304
+ "faithfulness_score": round(1 - delta_r, 3)
305
+ }
306
+
307
+ return trace
308
+
309
+ def format_pipeline_trace(trace: Dict, query: str, domain: str) -> str:
310
+ """Format pipeline trace as a readable markdown string."""
311
+ preset = DOMAIN_PRESETS.get(domain, DOMAIN_PRESETS["general"])
312
+ lines = []
313
+
314
+ lines.append(f"# πŸŒ€ VORTEXRAG Pipeline Trace")
315
+ lines.append(f"**Query:** `{query}`")
316
+ lines.append(f"**Domain Preset:** `{domain}` β€” Ο„={preset['tau']}, ΞΈ_CPG={preset['theta_cpg']}, Ξ΄_SDC={preset['delta_sdc']}")
317
+ lines.append("")
318
+
319
+ # L1 TVE
320
+ tve = trace["L1_TVE"]
321
+ lines.append(f"## Layer 1 β€” TVE (Tri-Vector Encoding)")
322
+ lines.append(f"- Weights: {tve['weights']}")
323
+ lines.append(f"- Query causal density: `{tve['query_causal_density']:.3f}` β€” {tve['note'].split('β€”')[-1].strip()}")
324
+ lines.append("")
325
+ lines.append("| Chunk | TVE Score | Semantic | Syntactic | Causal |")
326
+ lines.append("|-------|-----------|----------|-----------|--------|")
327
+ for c in tve["chunks"]:
328
+ lines.append(f"| Chunk {c['id']} | **{c['tve_score']}** | {c['sem']} | {c['syn']} | {c['cau']} |")
329
+ lines.append("")
330
+
331
+ # L2 VRC
332
+ vrc = trace["L2_VRC"]
333
+ lines.append(f"## Layer 2 β€” VRC (Vortex Retrieval Cone)")
334
+ lines.append(f"- Input: {vrc['total_input']} chunks β†’ After spiral filtering: **{vrc['after_vrc']}** ({vrc['filtered']} filtered out)")
335
+ lines.append("")
336
+ lines.append("| Rank | Chunk | TVE | Spiral Rank | Causal Offset |")
337
+ lines.append("|------|-------|-----|-------------|---------------|")
338
+ for i, c in enumerate(vrc["top_candidates"]):
339
+ lines.append(f"| #{i+1} | Chunk {c['id']} | {c['tve']} | **{c['spiral_rank']}** | {c['causal_offset']} |")
340
+ lines.append("")
341
+
342
+ # L3 SDC
343
+ sdc = trace["L3_SDC"]
344
+ lines.append(f"## Layer 3 β€” SDC (Semantic Drift Corrector)")
345
+ lines.append(f"- Temperature Ο„={sdc['tau']} | Threshold Ξ΄_SDC={sdc['delta_sdc']}")
346
+ lines.append(f"- Accepted: **{sdc['accepted']}** | Rejected: **{sdc['rejected']}** (semantic drift detected)")
347
+ if sdc["rejected_chunks"]:
348
+ lines.append("")
349
+ lines.append("**Rejected chunks (semantic drift):**")
350
+ for c in sdc["rejected_chunks"]:
351
+ lines.append(f"- Chunk {c['id']}: {c['reason']}")
352
+ lines.append(f" > _{c['text_preview']}_")
353
+ lines.append("")
354
+
355
+ # L4 CPG
356
+ cpg = trace["L4_CPG"]
357
+ status = "βœ… CLEAN" if cpg["window_clean"] else "⚠️ PARTIALLY CLEANED"
358
+ lines.append(f"## Layer 4 β€” CPG (Context Poison Guard)")
359
+ lines.append(f"- Threshold ΞΈ_CPG={cpg['theta_cpg']} | Final ESR: **{cpg['final_esr']}** β†’ {status}")
360
+ lines.append(f"- Purge steps: {cpg['purge_steps']} | Remaining: {cpg['remaining_chunks']} chunks")
361
+ if cpg["purge_log"]:
362
+ lines.append("")
363
+ lines.append("**Purge log:**")
364
+ for p in cpg["purge_log"]:
365
+ lines.append(f"- Step {p['iteration']}: Removed Chunk {p['purged_id']} (SDS={p['purged_sds']}, ESR before={p['esr_before']})")
366
+ lines.append("")
367
+
368
+ # L5 RFG
369
+ rfg = trace["L5_RFG"]
370
+ lines.append(f"## Layer 5 β€” RFG (Rank Fusion Gate)")
371
+ lines.append(f"- Multiplicative Ξ¦-score: TVE^Ξ± Γ— SDS^Ξ² Γ— ESR_contrib^Ξ³ (no-weak-link policy)")
372
+ lines.append("")
373
+ lines.append("| Rank | Chunk | TVE | SDS | Ξ¦+ Score |")
374
+ lines.append("|------|-------|-----|-----|----------|")
375
+ for i, c in enumerate(rfg["ranked"]):
376
+ lines.append(f"| #{i+1} | Chunk {c['id']} | {c['tve']} | {c['sds']} | **{c['phi_plus']}** |")
377
+ lines.append("")
378
+
379
+ # L6 CCB
380
+ ccb = trace["L6_CCB"]
381
+ lines.append(f"## Layer 6 β€” CCB (Causal Context Builder)")
382
+ lines.append(f"- Formula: pos = rank(Ξ¦+) Γ— causal_depth")
383
+ lines.append(f"- Depth-0 root-cause chunks guaranteed at position 0 (exploits U-shaped LLM recall)")
384
+ lines.append("")
385
+ lines.append("| Context Position | Chunk | Causal Depth | CCB Position Score |")
386
+ lines.append("|-----------------|-------|--------------|-------------------|")
387
+ for c in ccb["ordered_context"]:
388
+ depth_label = " ← root cause" if c["causal_depth"] == 0 else ""
389
+ lines.append(f"| {c['position']} | Chunk {c['chunk_id']} | depth={c['causal_depth']}{depth_label} | {c['ccb_pos']} |")
390
+ lines.append("")
391
+
392
+ # L7 FV
393
+ fv = trace["L7_FV"]
394
+ lines.append(f"## Layer 7 β€” FV (Faithfulness Verifier)")
395
+ lines.append(f"- Ξ΄_FV={fv['delta_fv']} | ROUGE-L={fv['rouge_l']} | NLI={fv['nli_entailment']}")
396
+ lines.append(f"- Ξ”R = 1 βˆ’ {fv['rouge_l']} Γ— {fv['nli_entailment']} = **{fv['delta_r']}**")
397
+ lines.append(f"- **{fv['verdict']}** | Faithfulness Score: **{fv['faithfulness_score']}**")
398
+ lines.append("")
399
+ lines.append("---")
400
+ lines.append(f"### πŸ“Š Pipeline Summary")
401
+ lines.append(f"| Metric | Value |")
402
+ lines.append(f"|--------|-------|")
403
+ lines.append(f"| Domain Preset | {domain} |")
404
+ lines.append(f"| Chunks Input | {trace['L2_VRC']['total_input']} |")
405
+ lines.append(f"| After VRC | {trace['L2_VRC']['after_vrc']} |")
406
+ lines.append(f"| After SDC | {trace['L3_SDC']['accepted']} |")
407
+ lines.append(f"| After CPG | {trace['L4_CPG']['remaining_chunks']} |")
408
+ lines.append(f"| Final Context | {len(trace['L5_RFG']['ranked'])} chunks |")
409
+ lines.append(f"| Final ESR | {trace['L4_CPG']['final_esr']} |")
410
+ lines.append(f"| Faithfulness Ξ”R | {trace['L7_FV']['delta_r']} |")
411
+ lines.append(f"| Verdict | {trace['L7_FV']['verdict']} |")
412
+
413
+ return "\n".join(lines)
414
+
415
+ # ─── Example Queries ───────────────────────────────────────────────────────────
416
+ EXAMPLE_QUERIES = {
417
+ "Financial (2008 Crisis)": {
418
+ "query": "Why did the 2008 US subprime mortgage crisis transmit to global markets rather than remaining contained within US financial institutions?",
419
+ "domain": "financial",
420
+ "chunks": [
421
+ "Credit default swaps (CDS) written on MBS tranches amplified counterparty exposure across 23 global systemically important banks. When MBS values collapsed, CDS counterparties faced simultaneous margin calls which caused global dollar funding markets to freeze.",
422
+ "The 2008 recession caused unemployment to rise to 10.0% by October 2009 according to Bureau of Labor Statistics data. Many workers lost jobs and homes.",
423
+ "The Dodd-Frank Wall Street Reform Act of 2010 introduced the Volcker Rule which restricts banks from making speculative investments. This was a policy response enacted after the crisis.",
424
+ "Lehman Brothers Holdings Inc. filed for Chapter 11 bankruptcy protection on September 15, 2008 with $613 billion in debt. This was a major trigger event.",
425
+ "The subprime mortgage crisis of 2007–2008 involved the collapse of mortgage-backed securities. Banks had sold these instruments globally enabling contagion to spread.",
426
+ ]
427
+ },
428
+ "Medical (mRNA Vaccine)": {
429
+ "query": "Does mRNA vaccine technology require the vaccine mRNA to enter the cell nucleus for spike protein synthesis?",
430
+ "domain": "medical",
431
+ "chunks": [
432
+ "Cytoplasmic ribosomes translate the mRNA into spike protein without any nuclear involvement. The mRNA is degraded by cytoplasmic RNases within 24–72 hours after delivery.",
433
+ "Lipid nanoparticles (LNPs) fuse with the endosomal membrane after cell uptake, releasing mRNA directly into the cytoplasm. This enables cytoplasmic translation without nuclear entry.",
434
+ "Nuclear transcription requires RNA polymerase to synthesize mRNA from a DNA template inside the nucleus. This is a different process from mRNA vaccine translation.",
435
+ "Reverse transcriptase enzyme converts RNA into complementary DNA. This enzyme is present in retroviruses but absent in mammalian cells unless introduced.",
436
+ "The ribosome assembles around the mRNA start codon and synthesizes spike protein in the cytoplasm. No nuclear localization signals are present in vaccine mRNA sequences.",
437
+ ]
438
+ },
439
+ "Legal (Precedent Chain)": {
440
+ "query": "Did the precedent set in Brown v. Board of Education 1954 also apply to public universities before the Civil Rights Act of 1964?",
441
+ "domain": "legal",
442
+ "chunks": [
443
+ "Cooper v. Aaron (1958): The Supreme Court unanimously held that the constitutional rights declared in Brown applied to all state institutions. This directly extended Brown to all state agencies.",
444
+ "Sweatt v. Painter (1950) required the University of Texas Law School to admit Black students under separate but equal scrutiny. This precedent enabled university desegregation.",
445
+ "The Civil Rights Act of 1964 prohibited discrimination based on race, color, religion, sex, or national origin in programs receiving federal funding, codifying existing constitutional requirements.",
446
+ "Brown v. Board of Education (1954) held that separate educational facilities are inherently unequal. The decision directly addressed K-12 public schools in Topeka Kansas.",
447
+ "The Voting Rights Act of 1965 addressed voting discrimination and is a separate legislative act from school desegregation requirements.",
448
+ ]
449
+ },
450
+ "Scientific (Supernovae)": {
451
+ "query": "What are the distinct progenitor systems distinguishing Type Ia from core-collapse Type II supernovae?",
452
+ "domain": "scientific",
453
+ "chunks": [
454
+ "Type Ia supernovae originate from a carbon-oxygen white dwarf in a binary system that accretes material from a companion star until reaching the Chandrasekhar limit of 1.44 solar masses, triggering thermonuclear runaway.",
455
+ "Type II supernovae occur when massive stars with mass greater than 8 solar masses exhaust their nuclear fuel. The iron core collapses, producing a neutron star or black hole and ejecting the outer envelope.",
456
+ "Type Ia supernovae are used as standard candles in cosmology because their peak luminosity is uniform. This enables measurement of cosmic distances and the expansion rate of the universe.",
457
+ "Iron photodisintegration absorbs energy in the core of massive stars, removing pressure support and causing gravitational collapse. This is the trigger mechanism for core-collapse supernovae.",
458
+ "The Chandrasekhar limit of approximately 1.44 solar masses is the maximum mass for which electron degeneracy pressure can support a white dwarf. Exceeding this limit causes carbon ignition and complete disruption.",
459
+ ]
460
+ },
461
+ }
462
+
463
+ # ─── Gradio Interface ──────────────────────────────────────────────────────────
464
+ def process_query(query: str, domain: str, chunk_text: str, example_select: str) -> Tuple[str, str]:
465
+ """Main processing function for Gradio interface."""
466
+ # Handle example selection
467
+ if example_select and example_select != "Custom Input":
468
+ ex = EXAMPLE_QUERIES.get(example_select, {})
469
+ if ex:
470
+ query = ex["query"]
471
+ domain = ex["domain"]
472
+ chunks = ex["chunks"]
473
+ chunk_display = "\n---\n".join(chunks)
474
+ else:
475
+ if not query.strip():
476
+ return "⚠️ Please enter a query.", ""
477
+ chunks = [c.strip() for c in chunk_text.split("---") if c.strip()]
478
+ chunk_display = chunk_text
479
+ else:
480
+ if not query.strip():
481
+ return "⚠️ Please enter a query.", ""
482
+ chunks = [c.strip() for c in chunk_text.split("---") if c.strip()]
483
+ if not chunks:
484
+ return "⚠️ Please enter at least one document chunk (separate chunks with ---).", ""
485
+ chunk_display = chunk_text
486
+
487
+ if not chunks:
488
+ return "⚠️ No document chunks found. Use --- to separate chunks.", ""
489
+
490
+ try:
491
+ trace = run_vortexrag_pipeline(query, chunks, domain)
492
+ result = format_pipeline_trace(trace, query, domain)
493
+ return result, "\n---\n".join(chunks) if example_select != "Custom Input" else chunk_text
494
+ except Exception as e:
495
+ return f"❌ Error: {str(e)}", chunk_text
496
+
497
+
498
+ # ─── Build the App ─────────────────────────────────────────────────────────────
499
+ DESCRIPTION = """
500
+ # πŸŒ€ VORTEXRAG β€” Interactive Pipeline Demo
501
+
502
+ **Vector Orthogonal Resonance-Tuned EXtraction RAG** β€” A 7-layer framework for causal RAG that simultaneously eliminates Semantic Drift and Context Window Poisoning.
503
+
504
+ **Results:** EM=74.8 | F1=82.6 | Faithfulness=0.94 | +13.6 EM over Naive RAG | +7.9 EM over CRAG
505
+
506
+ πŸ“„ [Paper (Zenodo)](https://doi.org/10.5281/zenodo.20285144) &nbsp;|&nbsp; πŸ’» [GitHub](https://github.com/vignesh2027/VORTEXRAG) &nbsp;|&nbsp; 🌐 [Docs](https://vignesh2027.github.io/VORTEXRAG)
507
+ """
508
+
509
+ HOW_IT_WORKS = """
510
+ ### How VORTEXRAG Works
511
+
512
+ | Layer | Name | What It Does | Key Formula |
513
+ |-------|------|--------------|-------------|
514
+ | 1 | **TVE** | Encodes text into 864d tri-vector (semantic+syntactic+causal) | `v = [Ξ±Β·sem; Ξ²Β·syn; Ξ³Β·cau]` |
515
+ | 2 | **VRC** | Spiral-rank candidates by causal alignment | `spiral = TVEΒ·e^{βˆ’Ξ»r}Β·cos(nΞΈ)` |
516
+ | 3 | **SDC** | Rejects semantically-drifted chunks | `SDS = 1βˆ’tanh(β€–Dβ€–/Ο„) β‰₯ 0.72` |
517
+ | 4 | **CPG** | Purges context-poisoning chunks | `ESR = Ξ£SDSΒ·w/(P+Ξ΅) β‰₯ 3.5` |
518
+ | 5 | **RFG** | Multiplicative rank fusion (no weak links) | `Ξ¦ = TVE^Ξ± Γ— SDS^Ξ² Γ— ESR^Ξ³` |
519
+ | 6 | **CCB** | Places root-cause chunks at position 0 | `pos = rank(Ξ¦+) Γ— causal_depth` |
520
+ | 7 | **FV** | Faithfulness verification gate | `Ξ”R = 1βˆ’ROUGE-LΓ—NLI ≀ 0.15` |
521
+
522
+ **11 Domain Presets:** scientific, medical, legal, financial, code, cybersecurity, educational, historical, customer support, creative, general β€” each with calibrated (Ξ±,Ξ²,Ξ³,Ο„,ΞΈ_CPG,Ξ΄_SDC,Ξ΄_FV) parameters.
523
+ """
524
+
525
+ with gr.Blocks(title="VORTEXRAG β€” 7-Layer Causal RAG", theme=gr.themes.Soft()) as demo:
526
+ gr.Markdown(DESCRIPTION)
527
+
528
+ with gr.Tabs():
529
+ with gr.TabItem("πŸš€ Pipeline Demo"):
530
+ with gr.Row():
531
+ with gr.Column(scale=1):
532
+ example_select = gr.Dropdown(
533
+ label="πŸ“š Load Example",
534
+ choices=["Custom Input"] + list(EXAMPLE_QUERIES.keys()),
535
+ value="Custom Input",
536
+ )
537
+ domain_select = gr.Dropdown(
538
+ label="🎯 Domain Preset",
539
+ choices=list(DOMAIN_PRESETS.keys()),
540
+ value="general",
541
+ )
542
+ query_input = gr.Textbox(
543
+ label="πŸ” Query",
544
+ placeholder="Enter your multi-hop or causal question here...",
545
+ lines=3,
546
+ )
547
+ chunk_input = gr.Textbox(
548
+ label="πŸ“„ Document Chunks (separate with ---)",
549
+ placeholder="Paste your document chunks here.\nSeparate each chunk with ---\n\nExample:\nThe 2008 crisis caused X.\n---\nDodd-Frank was enacted in 2010.\n---\nCDS exposure spread risk globally.",
550
+ lines=10,
551
+ )
552
+ run_btn = gr.Button("β–Ά Run VORTEXRAG Pipeline", variant="primary")
553
+
554
+ with gr.Column(scale=2):
555
+ output = gr.Markdown(label="Pipeline Trace")
556
+ chunk_display = gr.Textbox(label="Loaded Chunks", lines=8, visible=False)
557
+
558
+ run_btn.click(
559
+ fn=process_query,
560
+ inputs=[query_input, domain_select, chunk_input, example_select],
561
+ outputs=[output, chunk_display]
562
+ )
563
+
564
+ example_select.change(
565
+ fn=lambda x: (
566
+ EXAMPLE_QUERIES.get(x, {}).get("query", "") if x != "Custom Input" else "",
567
+ EXAMPLE_QUERIES.get(x, {}).get("domain", "general") if x != "Custom Input" else "general",
568
+ ),
569
+ inputs=[example_select],
570
+ outputs=[query_input, domain_select]
571
+ )
572
+
573
+ with gr.TabItem("πŸ“ How It Works"):
574
+ gr.Markdown(HOW_IT_WORKS)
575
+
576
+ gr.Markdown("### πŸ“Š Benchmark Results")
577
+ gr.DataFrame(
578
+ value={
579
+ "System": ["Naive RAG", "BM25+Rerank", "HyDE", "CRAG", "Self-RAG", "FiD", "FLARE", "VORTEXRAG"],
580
+ "EM": [61.2, 59.8, 64.1, 66.9, 68.4, 63.5, 65.7, 74.8],
581
+ "F1": [68.4, 66.1, 71.8, 74.3, 75.9, 70.2, 72.9, 82.6],
582
+ "Faithfulness": [0.71, 0.69, 0.74, 0.78, 0.81, 0.73, 0.75, 0.94],
583
+ "SDR": ["β€”", "β€”", "12%", "31%", "35%", "8%", "14%", "61%"],
584
+ "Latency": ["120ms", "95ms", "340ms", "290ms", "410ms", "280ms", "320ms", "185ms"],
585
+ },
586
+ label="Main Benchmark Results (NQ+HotpotQA+MuSiQue+2Wiki)",
587
+ interactive=False,
588
+ )
589
+
590
+ gr.Markdown("### πŸ”¬ Layer-by-Layer Ablation")
591
+ gr.DataFrame(
592
+ value={
593
+ "Configuration": ["(A) Baseline", "(B)+TVE", "(C)+VRC", "(D)+SDC", "(E)+CPG", "(F)+RFG", "(G)+CCB", "(H)+FV [FULL]"],
594
+ "EM": [61.2, 65.3, 67.8, 70.4, 72.1, 73.4, 73.9, 74.8],
595
+ "F1": [68.4, 72.1, 74.9, 78.2, 80.3, 81.5, 82.0, 82.6],
596
+ "Faithfulness": [0.71, 0.75, 0.78, 0.83, 0.88, 0.90, 0.91, 0.94],
597
+ "Ξ”EM": ["+0", "+4.1", "+2.5", "+2.6", "+1.7", "+1.3", "+0.5", "+0.9"],
598
+ },
599
+ label="Layer-by-Layer Ablation Study",
600
+ interactive=False,
601
+ )
602
+
603
+ with gr.TabItem("βš™οΈ Domain Presets"):
604
+ gr.Markdown("### 11 Domain Preset Parameter Vectors")
605
+ gr.DataFrame(
606
+ value={
607
+ "Domain": list(DOMAIN_PRESETS.keys()),
608
+ "Ξ± (semantic)": [v["alpha"] for v in DOMAIN_PRESETS.values()],
609
+ "Ξ² (syntactic)": [v["beta"] for v in DOMAIN_PRESETS.values()],
610
+ "Ξ³ (causal)": [v["gamma"] for v in DOMAIN_PRESETS.values()],
611
+ "Ο„ (temperature)": [v["tau"] for v in DOMAIN_PRESETS.values()],
612
+ "ΞΈ_CPG": [v["theta_cpg"] for v in DOMAIN_PRESETS.values()],
613
+ "Ξ΄_SDC": [v["delta_sdc"] for v in DOMAIN_PRESETS.values()],
614
+ "Ξ΄_FV": [v["delta_fv"] for v in DOMAIN_PRESETS.values()],
615
+ },
616
+ label="Domain Preset Parameters",
617
+ interactive=False,
618
+ )
619
+
620
+ with gr.TabItem("πŸ”— Links & Citation"):
621
+ gr.Markdown("""
622
+ ### Cite VORTEXRAG
623
+
624
+ ```bibtex
625
+ @article{vignesh2026vortexrag,
626
+ title = {{VORTEXRAG}: Vector Orthogonal Resonance-Tuned EXtraction Retrieval-Augmented Generation},
627
+ author = {Vignesh L},
628
+ year = {2026},
629
+ month = {May},
630
+ url = {https://github.com/vignesh2027/VORTEXRAG},
631
+ doi = {10.5281/zenodo.20285144},
632
+ note = {Independent Research. v2.0. Open-Source Preprint.},
633
+ keywords= {RAG, Semantic Drift, Context Window Poisoning, Causal NLP}
634
+ }
635
+ ```
636
+
637
+ ### Links
638
+ - πŸ“„ **Paper:** https://doi.org/10.5281/zenodo.20285144
639
+ - πŸ’» **GitHub:** https://github.com/vignesh2027/VORTEXRAG
640
+ - 🌐 **Docs:** https://vignesh2027.github.io/VORTEXRAG
641
+ - πŸ†” **ORCID:** https://orcid.org/0009-0004-9777-7592
642
+
643
+ ### Quick Start
644
+ ```bash
645
+ git clone https://github.com/vignesh2027/VORTEXRAG
646
+ pip install -r requirements.txt
647
+ python examples/demo_gradio.py
648
+ ```
649
+
650
+ ### License
651
+ MIT License β€” Free for academic and commercial use.
652
+
653
+ **Author:** Vignesh L | Independent Researcher | May 2026
654
+ """)
655
+
656
+ demo.launch()