Spaces:
Running
Running
Add app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,656 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
VORTEXRAG Interactive Demo
|
| 3 |
+
Vector Orthogonal Resonance-Tuned EXtraction RAG
|
| 4 |
+
A 7-Layer Framework for Causal Retrieval-Augmented Generation
|
| 5 |
+
|
| 6 |
+
Author: Vignesh L | DOI: 10.5281/zenodo.20285144
|
| 7 |
+
GitHub: https://github.com/vignesh2027/VORTEXRAG
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import gradio as gr
|
| 11 |
+
import math
|
| 12 |
+
import random
|
| 13 |
+
import json
|
| 14 |
+
import re
|
| 15 |
+
from typing import List, Dict, Tuple, Optional
|
| 16 |
+
|
| 17 |
+
# βββ Domain Presets ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 18 |
+
DOMAIN_PRESETS = {
|
| 19 |
+
"general": {"alpha": 0.50, "beta": 0.25, "gamma": 0.25, "tau": 0.80, "theta_cpg": 3.5, "delta_sdc": 0.72, "delta_fv": 0.15},
|
| 20 |
+
"medical": {"alpha": 0.45, "beta": 0.15, "gamma": 0.40, "tau": 0.35, "theta_cpg": 5.0, "delta_sdc": 0.75, "delta_fv": 0.10},
|
| 21 |
+
"legal": {"alpha": 0.35, "beta": 0.30, "gamma": 0.35, "tau": 0.40, "theta_cpg": 4.5, "delta_sdc": 0.72, "delta_fv": 0.15},
|
| 22 |
+
"financial": {"alpha": 0.45, "beta": 0.25, "gamma": 0.30, "tau": 0.50, "theta_cpg": 3.5, "delta_sdc": 0.70, "delta_fv": 0.20},
|
| 23 |
+
"scientific": {"alpha": 0.40, "beta": 0.20, "gamma": 0.40, "tau": 0.30, "theta_cpg": 4.0, "delta_sdc": 0.76, "delta_fv": 0.15},
|
| 24 |
+
"code": {"alpha": 0.30, "beta": 0.45, "gamma": 0.25, "tau": 0.60, "theta_cpg": 3.5, "delta_sdc": 0.68, "delta_fv": 0.20},
|
| 25 |
+
"cybersecurity":{"alpha": 0.35, "beta": 0.30, "gamma": 0.35, "tau": 0.45, "theta_cpg": 4.0, "delta_sdc": 0.72, "delta_fv": 0.15},
|
| 26 |
+
"educational": {"alpha": 0.55, "beta": 0.20, "gamma": 0.25, "tau": 0.65, "theta_cpg": 3.0, "delta_sdc": 0.65, "delta_fv": 0.20},
|
| 27 |
+
"historical": {"alpha": 0.45, "beta": 0.20, "gamma": 0.35, "tau": 0.90, "theta_cpg": 3.0, "delta_sdc": 0.65, "delta_fv": 0.20},
|
| 28 |
+
"creative": {"alpha": 0.65, "beta": 0.20, "gamma": 0.15, "tau": 1.20, "theta_cpg": 2.5, "delta_sdc": 0.55, "delta_fv": 0.25},
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
# βββ Causal Feature Detection ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 32 |
+
CAUSAL_CONNECTIVES = [
|
| 33 |
+
"because", "since", "as", "therefore", "thus", "hence", "consequently",
|
| 34 |
+
"accordingly", "owing to", "due to", "because of", "as a result",
|
| 35 |
+
"results in", "leads to", "causes", "enables", "triggers", "produces",
|
| 36 |
+
"brings about", "is responsible for", "contributes to", "stems from",
|
| 37 |
+
"arises from", "follows from", "so that", "thereby", "given that",
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
CAUSAL_VERBS = [
|
| 41 |
+
"cause", "enable", "trigger", "produce", "generate", "induce", "drive",
|
| 42 |
+
"lead", "result", "create", "allow", "force", "make", "bring", "spark",
|
| 43 |
+
"initiate", "originate", "stem", "arise", "follow", "influence", "affect",
|
| 44 |
+
"determine", "contribute", "prevent", "inhibit",
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
TEMPORAL_MARKERS = [
|
| 48 |
+
"before", "after", "then", "subsequently", "previously", "first",
|
| 49 |
+
"finally", "later", "earlier", "following", "preceding", "once", "until",
|
| 50 |
+
]
|
| 51 |
+
|
| 52 |
+
def compute_causal_density(text: str) -> float:
|
| 53 |
+
"""Compute causal signal density for a text chunk."""
|
| 54 |
+
text_lower = text.lower()
|
| 55 |
+
words = text_lower.split()
|
| 56 |
+
sentences = max(1, text.count('.') + text.count('?') + text.count('!'))
|
| 57 |
+
tokens = max(1, len(words))
|
| 58 |
+
|
| 59 |
+
conn_density = sum(1 for c in CAUSAL_CONNECTIVES if c in text_lower) / sentences
|
| 60 |
+
verb_density = sum(1 for v in CAUSAL_VERBS if v in words) / tokens
|
| 61 |
+
temp_density = sum(1 for t in TEMPORAL_MARKERS if t in words) / sentences
|
| 62 |
+
|
| 63 |
+
# Normalize to [0, 1]
|
| 64 |
+
score = min(1.0, (conn_density * 0.5 + verb_density * 10 + temp_density * 0.3))
|
| 65 |
+
return round(score, 3)
|
| 66 |
+
|
| 67 |
+
def compute_semantic_density(text: str, query: str) -> float:
|
| 68 |
+
"""Rough semantic similarity via shared significant words."""
|
| 69 |
+
q_words = set(re.findall(r'\b\w{4,}\b', query.lower()))
|
| 70 |
+
t_words = set(re.findall(r'\b\w{4,}\b', text.lower()))
|
| 71 |
+
if not q_words or not t_words:
|
| 72 |
+
return 0.0
|
| 73 |
+
overlap = len(q_words & t_words)
|
| 74 |
+
union = len(q_words | t_words)
|
| 75 |
+
# Add a small base to avoid everything being too low
|
| 76 |
+
base = 0.40
|
| 77 |
+
jaccard = overlap / union if union > 0 else 0
|
| 78 |
+
return round(min(1.0, base + jaccard * 1.5), 3)
|
| 79 |
+
|
| 80 |
+
def compute_sds(causal_density_query: float, causal_density_chunk: float, tau: float) -> float:
|
| 81 |
+
"""Semantic Drift Score: SDS = 1 - tanh(||D|| / tau)"""
|
| 82 |
+
drift_magnitude = abs(causal_density_query - causal_density_chunk)
|
| 83 |
+
sds = 1.0 - math.tanh(drift_magnitude / tau)
|
| 84 |
+
return round(max(0.0, min(1.0, sds)), 3)
|
| 85 |
+
|
| 86 |
+
def compute_spiral_rank(tve_score: float, causal_offset: float, lambda_val: float = 0.5, n: int = 2) -> float:
|
| 87 |
+
"""VRC Spiral Rank: spiral_rank = TVE * exp(-lambda*r) * cos(n*theta)"""
|
| 88 |
+
r = 1.0 - tve_score
|
| 89 |
+
radial_decay = math.exp(-lambda_val * r)
|
| 90 |
+
spiral_mod = math.cos(n * causal_offset)
|
| 91 |
+
return round(tve_score * radial_decay * spiral_mod, 4)
|
| 92 |
+
|
| 93 |
+
def compute_phi(tve: float, sds: float, esr_contrib: float,
|
| 94 |
+
alpha: float, beta: float, gamma: float) -> float:
|
| 95 |
+
"""RFG Phi score: Phi = TVE^alpha * SDS^beta * ESR_contrib^gamma"""
|
| 96 |
+
tve = max(0.001, tve)
|
| 97 |
+
sds = max(0.001, sds)
|
| 98 |
+
esr_contrib = max(0.001, esr_contrib)
|
| 99 |
+
return round((tve ** alpha) * (sds ** beta) * (esr_contrib ** gamma), 4)
|
| 100 |
+
|
| 101 |
+
def compute_esr(chunks_data: List[Dict]) -> float:
|
| 102 |
+
"""Compute Effective Signal Ratio for window."""
|
| 103 |
+
if not chunks_data:
|
| 104 |
+
return 0.0
|
| 105 |
+
epsilon = 1e-6
|
| 106 |
+
w_sum = sum(c.get('tve_score', 0.5) for c in chunks_data)
|
| 107 |
+
if w_sum == 0:
|
| 108 |
+
return 0.0
|
| 109 |
+
# Softmax weights
|
| 110 |
+
weights = [c.get('tve_score', 0.5) / w_sum for c in chunks_data]
|
| 111 |
+
signal = sum(c['sds'] * w for c, w in zip(chunks_data, weights))
|
| 112 |
+
poison = sum((1 - c['sds']) * w for c, w in zip(chunks_data, weights)) / max(1, len(chunks_data))
|
| 113 |
+
return round(signal / (poison + epsilon), 3)
|
| 114 |
+
|
| 115 |
+
def run_vortexrag_pipeline(query: str, chunks: List[str], domain: str) -> Dict:
|
| 116 |
+
"""
|
| 117 |
+
Run the full VORTEXRAG 7-layer pipeline and return trace.
|
| 118 |
+
Uses deterministic scoring without external models.
|
| 119 |
+
"""
|
| 120 |
+
preset = DOMAIN_PRESETS.get(domain, DOMAIN_PRESETS["general"])
|
| 121 |
+
alpha, beta, gamma = preset["alpha"], preset["beta"], preset["gamma"]
|
| 122 |
+
tau, theta_cpg = preset["tau"], preset["theta_cpg"]
|
| 123 |
+
delta_sdc, delta_fv = preset["delta_sdc"], preset["delta_fv"]
|
| 124 |
+
|
| 125 |
+
trace = {}
|
| 126 |
+
|
| 127 |
+
# ββ Layer 1: TVE ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 128 |
+
query_causal = compute_causal_density(query)
|
| 129 |
+
trace["L1_TVE"] = {
|
| 130 |
+
"query_causal_density": query_causal,
|
| 131 |
+
"domain": domain,
|
| 132 |
+
"weights": f"Ξ±={alpha}, Ξ²={beta}, Ξ³={gamma}",
|
| 133 |
+
"note": f"Query causal density: {query_causal:.3f} β {'high causal' if query_causal > 0.3 else 'moderate causal' if query_causal > 0.1 else 'low causal'} query"
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
# Score each chunk
|
| 137 |
+
chunk_scores = []
|
| 138 |
+
for i, text in enumerate(chunks):
|
| 139 |
+
sem = compute_semantic_density(text, query)
|
| 140 |
+
cau = compute_causal_density(text)
|
| 141 |
+
# Syntactic score approximation (longer, more structured text β higher)
|
| 142 |
+
syn = min(1.0, len(text.split('.')) * 0.15 + 0.3)
|
| 143 |
+
tve_score = round(alpha * sem + beta * syn + gamma * cau, 3)
|
| 144 |
+
chunk_scores.append({
|
| 145 |
+
"id": i,
|
| 146 |
+
"text": text[:120] + "..." if len(text) > 120 else text,
|
| 147 |
+
"sem": sem,
|
| 148 |
+
"syn": round(syn, 3),
|
| 149 |
+
"cau": cau,
|
| 150 |
+
"tve_score": tve_score,
|
| 151 |
+
})
|
| 152 |
+
|
| 153 |
+
trace["L1_TVE"]["chunks"] = chunk_scores
|
| 154 |
+
|
| 155 |
+
# ββ Layer 2: VRC ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 156 |
+
vrc_chunks = []
|
| 157 |
+
for c in chunk_scores:
|
| 158 |
+
causal_offset = abs(c["cau"] - query_causal) * math.pi
|
| 159 |
+
spiral = compute_spiral_rank(c["tve_score"], causal_offset)
|
| 160 |
+
c["spiral_rank"] = spiral
|
| 161 |
+
c["causal_offset_rad"] = round(causal_offset, 3)
|
| 162 |
+
c["filtered_vrc"] = spiral < 0
|
| 163 |
+
if not c["filtered_vrc"]:
|
| 164 |
+
vrc_chunks.append(c)
|
| 165 |
+
|
| 166 |
+
vrc_chunks_sorted = sorted(vrc_chunks, key=lambda x: x["spiral_rank"], reverse=True)
|
| 167 |
+
trace["L2_VRC"] = {
|
| 168 |
+
"total_input": len(chunk_scores),
|
| 169 |
+
"after_vrc": len(vrc_chunks_sorted),
|
| 170 |
+
"filtered": len(chunk_scores) - len(vrc_chunks_sorted),
|
| 171 |
+
"top_candidates": [{
|
| 172 |
+
"id": c["id"],
|
| 173 |
+
"text_preview": c["text"][:80] + "...",
|
| 174 |
+
"tve": c["tve_score"],
|
| 175 |
+
"spiral_rank": c["spiral_rank"],
|
| 176 |
+
"causal_offset": f"{math.degrees(c['causal_offset_rad']):.1f}Β°"
|
| 177 |
+
} for c in vrc_chunks_sorted[:5]]
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
# ββ Layer 3: SDC ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 181 |
+
sdc_accepted = []
|
| 182 |
+
sdc_rejected = []
|
| 183 |
+
for c in vrc_chunks_sorted:
|
| 184 |
+
sds = compute_sds(query_causal, c["cau"], tau)
|
| 185 |
+
c["sds"] = sds
|
| 186 |
+
if sds >= delta_sdc:
|
| 187 |
+
sdc_accepted.append(c)
|
| 188 |
+
else:
|
| 189 |
+
c["reject_reason"] = f"SDS={sds:.3f} < Ξ΄_SDC={delta_sdc}"
|
| 190 |
+
sdc_rejected.append(c)
|
| 191 |
+
|
| 192 |
+
trace["L3_SDC"] = {
|
| 193 |
+
"tau": tau,
|
| 194 |
+
"delta_sdc": delta_sdc,
|
| 195 |
+
"accepted": len(sdc_accepted),
|
| 196 |
+
"rejected": len(sdc_rejected),
|
| 197 |
+
"rejected_chunks": [{
|
| 198 |
+
"id": c["id"],
|
| 199 |
+
"text_preview": c["text"][:80] + "...",
|
| 200 |
+
"sds": c["sds"],
|
| 201 |
+
"reason": c.get("reject_reason", "")
|
| 202 |
+
} for c in sdc_rejected],
|
| 203 |
+
"accepted_ids": [c["id"] for c in sdc_accepted]
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
# ββ Layer 4: CPG ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 207 |
+
window = list(sdc_accepted)
|
| 208 |
+
cpg_purge_log = []
|
| 209 |
+
iterations = 0
|
| 210 |
+
|
| 211 |
+
while window:
|
| 212 |
+
esr = compute_esr(window)
|
| 213 |
+
if esr >= theta_cpg or len(window) <= 3:
|
| 214 |
+
break
|
| 215 |
+
# Find chunk with minimum SDS
|
| 216 |
+
min_chunk = min(window, key=lambda c: c["sds"])
|
| 217 |
+
cpg_purge_log.append({
|
| 218 |
+
"iteration": iterations + 1,
|
| 219 |
+
"purged_id": min_chunk["id"],
|
| 220 |
+
"purged_sds": min_chunk["sds"],
|
| 221 |
+
"esr_before": round(esr, 3),
|
| 222 |
+
"text_preview": min_chunk["text"][:60] + "..."
|
| 223 |
+
})
|
| 224 |
+
window = [c for c in window if c["id"] != min_chunk["id"]]
|
| 225 |
+
iterations += 1
|
| 226 |
+
if iterations > 10:
|
| 227 |
+
break
|
| 228 |
+
|
| 229 |
+
final_esr = compute_esr(window)
|
| 230 |
+
trace["L4_CPG"] = {
|
| 231 |
+
"theta_cpg": theta_cpg,
|
| 232 |
+
"final_esr": round(final_esr, 3),
|
| 233 |
+
"window_clean": final_esr >= theta_cpg,
|
| 234 |
+
"purge_steps": iterations,
|
| 235 |
+
"purge_log": cpg_purge_log,
|
| 236 |
+
"remaining_chunks": len(window)
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
# ββ Layer 5: RFG ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 240 |
+
if not window:
|
| 241 |
+
window = list(sdc_accepted)[:3] # fallback
|
| 242 |
+
|
| 243 |
+
w_sum = sum(c["tve_score"] for c in window)
|
| 244 |
+
sig_sum = sum(c["sds"] * c["tve_score"] / max(w_sum, 0.001) for c in window)
|
| 245 |
+
|
| 246 |
+
for c in window:
|
| 247 |
+
w_i = c["tve_score"] / max(w_sum, 0.001)
|
| 248 |
+
esr_contrib = (c["sds"] * w_i) / max(sig_sum, 0.001)
|
| 249 |
+
c["esr_contrib"] = round(esr_contrib, 4)
|
| 250 |
+
c["phi"] = compute_phi(c["tve_score"], c["sds"], c["esr_contrib"], alpha, beta, gamma)
|
| 251 |
+
|
| 252 |
+
phi_sum = sum(c["phi"] for c in window)
|
| 253 |
+
for c in window:
|
| 254 |
+
c["phi_plus"] = round(c["phi"] / max(phi_sum, 0.001), 4)
|
| 255 |
+
|
| 256 |
+
ranked = sorted(window, key=lambda x: x["phi_plus"], reverse=True)
|
| 257 |
+
top_m = ranked[:8]
|
| 258 |
+
|
| 259 |
+
trace["L5_RFG"] = {
|
| 260 |
+
"ranked": [{
|
| 261 |
+
"id": c["id"],
|
| 262 |
+
"text_preview": c["text"][:80] + "...",
|
| 263 |
+
"tve": c["tve_score"],
|
| 264 |
+
"sds": c["sds"],
|
| 265 |
+
"phi_plus": c["phi_plus"]
|
| 266 |
+
} for c in top_m]
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
# ββ Layer 6: CCB ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 270 |
+
# Assign causal depth based on causal density rank
|
| 271 |
+
sorted_by_cau = sorted(top_m, key=lambda x: x["cau"], reverse=True)
|
| 272 |
+
for depth, c in enumerate(sorted_by_cau):
|
| 273 |
+
c["causal_depth"] = depth
|
| 274 |
+
|
| 275 |
+
for rank_i, c in enumerate(top_m):
|
| 276 |
+
c["ccb_pos"] = rank_i * c["causal_depth"]
|
| 277 |
+
|
| 278 |
+
ordered = sorted(top_m, key=lambda x: x["ccb_pos"])
|
| 279 |
+
trace["L6_CCB"] = {
|
| 280 |
+
"ordered_context": [{
|
| 281 |
+
"position": i,
|
| 282 |
+
"chunk_id": c["id"],
|
| 283 |
+
"causal_depth": c["causal_depth"],
|
| 284 |
+
"ccb_pos": c["ccb_pos"],
|
| 285 |
+
"text_preview": c["text"][:100] + "..."
|
| 286 |
+
} for i, c in enumerate(ordered)]
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
# ββ Layer 7: FV βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 290 |
+
# Simulate faithfulness verification
|
| 291 |
+
context_richness = sum(c["sds"] * c["phi_plus"] for c in ordered)
|
| 292 |
+
rouge_l_sim = min(0.95, 0.5 + context_richness * 0.4)
|
| 293 |
+
nli_sim = min(0.98, 0.6 + context_richness * 0.35)
|
| 294 |
+
delta_r = round(1.0 - rouge_l_sim * nli_sim, 3)
|
| 295 |
+
accepted = delta_r <= delta_fv
|
| 296 |
+
|
| 297 |
+
trace["L7_FV"] = {
|
| 298 |
+
"delta_fv": delta_fv,
|
| 299 |
+
"rouge_l": round(rouge_l_sim, 3),
|
| 300 |
+
"nli_entailment": round(nli_sim, 3),
|
| 301 |
+
"delta_r": delta_r,
|
| 302 |
+
"accepted": accepted,
|
| 303 |
+
"verdict": "β
ACCEPTED" if accepted else f"β οΈ RETRY (ΞR={delta_r} > Ξ΄_FV={delta_fv})",
|
| 304 |
+
"faithfulness_score": round(1 - delta_r, 3)
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
+
return trace
|
| 308 |
+
|
| 309 |
+
def format_pipeline_trace(trace: Dict, query: str, domain: str) -> str:
|
| 310 |
+
"""Format pipeline trace as a readable markdown string."""
|
| 311 |
+
preset = DOMAIN_PRESETS.get(domain, DOMAIN_PRESETS["general"])
|
| 312 |
+
lines = []
|
| 313 |
+
|
| 314 |
+
lines.append(f"# π VORTEXRAG Pipeline Trace")
|
| 315 |
+
lines.append(f"**Query:** `{query}`")
|
| 316 |
+
lines.append(f"**Domain Preset:** `{domain}` β Ο={preset['tau']}, ΞΈ_CPG={preset['theta_cpg']}, Ξ΄_SDC={preset['delta_sdc']}")
|
| 317 |
+
lines.append("")
|
| 318 |
+
|
| 319 |
+
# L1 TVE
|
| 320 |
+
tve = trace["L1_TVE"]
|
| 321 |
+
lines.append(f"## Layer 1 β TVE (Tri-Vector Encoding)")
|
| 322 |
+
lines.append(f"- Weights: {tve['weights']}")
|
| 323 |
+
lines.append(f"- Query causal density: `{tve['query_causal_density']:.3f}` β {tve['note'].split('β')[-1].strip()}")
|
| 324 |
+
lines.append("")
|
| 325 |
+
lines.append("| Chunk | TVE Score | Semantic | Syntactic | Causal |")
|
| 326 |
+
lines.append("|-------|-----------|----------|-----------|--------|")
|
| 327 |
+
for c in tve["chunks"]:
|
| 328 |
+
lines.append(f"| Chunk {c['id']} | **{c['tve_score']}** | {c['sem']} | {c['syn']} | {c['cau']} |")
|
| 329 |
+
lines.append("")
|
| 330 |
+
|
| 331 |
+
# L2 VRC
|
| 332 |
+
vrc = trace["L2_VRC"]
|
| 333 |
+
lines.append(f"## Layer 2 β VRC (Vortex Retrieval Cone)")
|
| 334 |
+
lines.append(f"- Input: {vrc['total_input']} chunks β After spiral filtering: **{vrc['after_vrc']}** ({vrc['filtered']} filtered out)")
|
| 335 |
+
lines.append("")
|
| 336 |
+
lines.append("| Rank | Chunk | TVE | Spiral Rank | Causal Offset |")
|
| 337 |
+
lines.append("|------|-------|-----|-------------|---------------|")
|
| 338 |
+
for i, c in enumerate(vrc["top_candidates"]):
|
| 339 |
+
lines.append(f"| #{i+1} | Chunk {c['id']} | {c['tve']} | **{c['spiral_rank']}** | {c['causal_offset']} |")
|
| 340 |
+
lines.append("")
|
| 341 |
+
|
| 342 |
+
# L3 SDC
|
| 343 |
+
sdc = trace["L3_SDC"]
|
| 344 |
+
lines.append(f"## Layer 3 β SDC (Semantic Drift Corrector)")
|
| 345 |
+
lines.append(f"- Temperature Ο={sdc['tau']} | Threshold Ξ΄_SDC={sdc['delta_sdc']}")
|
| 346 |
+
lines.append(f"- Accepted: **{sdc['accepted']}** | Rejected: **{sdc['rejected']}** (semantic drift detected)")
|
| 347 |
+
if sdc["rejected_chunks"]:
|
| 348 |
+
lines.append("")
|
| 349 |
+
lines.append("**Rejected chunks (semantic drift):**")
|
| 350 |
+
for c in sdc["rejected_chunks"]:
|
| 351 |
+
lines.append(f"- Chunk {c['id']}: {c['reason']}")
|
| 352 |
+
lines.append(f" > _{c['text_preview']}_")
|
| 353 |
+
lines.append("")
|
| 354 |
+
|
| 355 |
+
# L4 CPG
|
| 356 |
+
cpg = trace["L4_CPG"]
|
| 357 |
+
status = "β
CLEAN" if cpg["window_clean"] else "β οΈ PARTIALLY CLEANED"
|
| 358 |
+
lines.append(f"## Layer 4 β CPG (Context Poison Guard)")
|
| 359 |
+
lines.append(f"- Threshold ΞΈ_CPG={cpg['theta_cpg']} | Final ESR: **{cpg['final_esr']}** β {status}")
|
| 360 |
+
lines.append(f"- Purge steps: {cpg['purge_steps']} | Remaining: {cpg['remaining_chunks']} chunks")
|
| 361 |
+
if cpg["purge_log"]:
|
| 362 |
+
lines.append("")
|
| 363 |
+
lines.append("**Purge log:**")
|
| 364 |
+
for p in cpg["purge_log"]:
|
| 365 |
+
lines.append(f"- Step {p['iteration']}: Removed Chunk {p['purged_id']} (SDS={p['purged_sds']}, ESR before={p['esr_before']})")
|
| 366 |
+
lines.append("")
|
| 367 |
+
|
| 368 |
+
# L5 RFG
|
| 369 |
+
rfg = trace["L5_RFG"]
|
| 370 |
+
lines.append(f"## Layer 5 β RFG (Rank Fusion Gate)")
|
| 371 |
+
lines.append(f"- Multiplicative Ξ¦-score: TVE^Ξ± Γ SDS^Ξ² Γ ESR_contrib^Ξ³ (no-weak-link policy)")
|
| 372 |
+
lines.append("")
|
| 373 |
+
lines.append("| Rank | Chunk | TVE | SDS | Ξ¦+ Score |")
|
| 374 |
+
lines.append("|------|-------|-----|-----|----------|")
|
| 375 |
+
for i, c in enumerate(rfg["ranked"]):
|
| 376 |
+
lines.append(f"| #{i+1} | Chunk {c['id']} | {c['tve']} | {c['sds']} | **{c['phi_plus']}** |")
|
| 377 |
+
lines.append("")
|
| 378 |
+
|
| 379 |
+
# L6 CCB
|
| 380 |
+
ccb = trace["L6_CCB"]
|
| 381 |
+
lines.append(f"## Layer 6 β CCB (Causal Context Builder)")
|
| 382 |
+
lines.append(f"- Formula: pos = rank(Ξ¦+) Γ causal_depth")
|
| 383 |
+
lines.append(f"- Depth-0 root-cause chunks guaranteed at position 0 (exploits U-shaped LLM recall)")
|
| 384 |
+
lines.append("")
|
| 385 |
+
lines.append("| Context Position | Chunk | Causal Depth | CCB Position Score |")
|
| 386 |
+
lines.append("|-----------------|-------|--------------|-------------------|")
|
| 387 |
+
for c in ccb["ordered_context"]:
|
| 388 |
+
depth_label = " β root cause" if c["causal_depth"] == 0 else ""
|
| 389 |
+
lines.append(f"| {c['position']} | Chunk {c['chunk_id']} | depth={c['causal_depth']}{depth_label} | {c['ccb_pos']} |")
|
| 390 |
+
lines.append("")
|
| 391 |
+
|
| 392 |
+
# L7 FV
|
| 393 |
+
fv = trace["L7_FV"]
|
| 394 |
+
lines.append(f"## Layer 7 β FV (Faithfulness Verifier)")
|
| 395 |
+
lines.append(f"- Ξ΄_FV={fv['delta_fv']} | ROUGE-L={fv['rouge_l']} | NLI={fv['nli_entailment']}")
|
| 396 |
+
lines.append(f"- ΞR = 1 β {fv['rouge_l']} Γ {fv['nli_entailment']} = **{fv['delta_r']}**")
|
| 397 |
+
lines.append(f"- **{fv['verdict']}** | Faithfulness Score: **{fv['faithfulness_score']}**")
|
| 398 |
+
lines.append("")
|
| 399 |
+
lines.append("---")
|
| 400 |
+
lines.append(f"### π Pipeline Summary")
|
| 401 |
+
lines.append(f"| Metric | Value |")
|
| 402 |
+
lines.append(f"|--------|-------|")
|
| 403 |
+
lines.append(f"| Domain Preset | {domain} |")
|
| 404 |
+
lines.append(f"| Chunks Input | {trace['L2_VRC']['total_input']} |")
|
| 405 |
+
lines.append(f"| After VRC | {trace['L2_VRC']['after_vrc']} |")
|
| 406 |
+
lines.append(f"| After SDC | {trace['L3_SDC']['accepted']} |")
|
| 407 |
+
lines.append(f"| After CPG | {trace['L4_CPG']['remaining_chunks']} |")
|
| 408 |
+
lines.append(f"| Final Context | {len(trace['L5_RFG']['ranked'])} chunks |")
|
| 409 |
+
lines.append(f"| Final ESR | {trace['L4_CPG']['final_esr']} |")
|
| 410 |
+
lines.append(f"| Faithfulness ΞR | {trace['L7_FV']['delta_r']} |")
|
| 411 |
+
lines.append(f"| Verdict | {trace['L7_FV']['verdict']} |")
|
| 412 |
+
|
| 413 |
+
return "\n".join(lines)
|
| 414 |
+
|
| 415 |
+
# βββ Example Queries βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 416 |
+
EXAMPLE_QUERIES = {
|
| 417 |
+
"Financial (2008 Crisis)": {
|
| 418 |
+
"query": "Why did the 2008 US subprime mortgage crisis transmit to global markets rather than remaining contained within US financial institutions?",
|
| 419 |
+
"domain": "financial",
|
| 420 |
+
"chunks": [
|
| 421 |
+
"Credit default swaps (CDS) written on MBS tranches amplified counterparty exposure across 23 global systemically important banks. When MBS values collapsed, CDS counterparties faced simultaneous margin calls which caused global dollar funding markets to freeze.",
|
| 422 |
+
"The 2008 recession caused unemployment to rise to 10.0% by October 2009 according to Bureau of Labor Statistics data. Many workers lost jobs and homes.",
|
| 423 |
+
"The Dodd-Frank Wall Street Reform Act of 2010 introduced the Volcker Rule which restricts banks from making speculative investments. This was a policy response enacted after the crisis.",
|
| 424 |
+
"Lehman Brothers Holdings Inc. filed for Chapter 11 bankruptcy protection on September 15, 2008 with $613 billion in debt. This was a major trigger event.",
|
| 425 |
+
"The subprime mortgage crisis of 2007β2008 involved the collapse of mortgage-backed securities. Banks had sold these instruments globally enabling contagion to spread.",
|
| 426 |
+
]
|
| 427 |
+
},
|
| 428 |
+
"Medical (mRNA Vaccine)": {
|
| 429 |
+
"query": "Does mRNA vaccine technology require the vaccine mRNA to enter the cell nucleus for spike protein synthesis?",
|
| 430 |
+
"domain": "medical",
|
| 431 |
+
"chunks": [
|
| 432 |
+
"Cytoplasmic ribosomes translate the mRNA into spike protein without any nuclear involvement. The mRNA is degraded by cytoplasmic RNases within 24β72 hours after delivery.",
|
| 433 |
+
"Lipid nanoparticles (LNPs) fuse with the endosomal membrane after cell uptake, releasing mRNA directly into the cytoplasm. This enables cytoplasmic translation without nuclear entry.",
|
| 434 |
+
"Nuclear transcription requires RNA polymerase to synthesize mRNA from a DNA template inside the nucleus. This is a different process from mRNA vaccine translation.",
|
| 435 |
+
"Reverse transcriptase enzyme converts RNA into complementary DNA. This enzyme is present in retroviruses but absent in mammalian cells unless introduced.",
|
| 436 |
+
"The ribosome assembles around the mRNA start codon and synthesizes spike protein in the cytoplasm. No nuclear localization signals are present in vaccine mRNA sequences.",
|
| 437 |
+
]
|
| 438 |
+
},
|
| 439 |
+
"Legal (Precedent Chain)": {
|
| 440 |
+
"query": "Did the precedent set in Brown v. Board of Education 1954 also apply to public universities before the Civil Rights Act of 1964?",
|
| 441 |
+
"domain": "legal",
|
| 442 |
+
"chunks": [
|
| 443 |
+
"Cooper v. Aaron (1958): The Supreme Court unanimously held that the constitutional rights declared in Brown applied to all state institutions. This directly extended Brown to all state agencies.",
|
| 444 |
+
"Sweatt v. Painter (1950) required the University of Texas Law School to admit Black students under separate but equal scrutiny. This precedent enabled university desegregation.",
|
| 445 |
+
"The Civil Rights Act of 1964 prohibited discrimination based on race, color, religion, sex, or national origin in programs receiving federal funding, codifying existing constitutional requirements.",
|
| 446 |
+
"Brown v. Board of Education (1954) held that separate educational facilities are inherently unequal. The decision directly addressed K-12 public schools in Topeka Kansas.",
|
| 447 |
+
"The Voting Rights Act of 1965 addressed voting discrimination and is a separate legislative act from school desegregation requirements.",
|
| 448 |
+
]
|
| 449 |
+
},
|
| 450 |
+
"Scientific (Supernovae)": {
|
| 451 |
+
"query": "What are the distinct progenitor systems distinguishing Type Ia from core-collapse Type II supernovae?",
|
| 452 |
+
"domain": "scientific",
|
| 453 |
+
"chunks": [
|
| 454 |
+
"Type Ia supernovae originate from a carbon-oxygen white dwarf in a binary system that accretes material from a companion star until reaching the Chandrasekhar limit of 1.44 solar masses, triggering thermonuclear runaway.",
|
| 455 |
+
"Type II supernovae occur when massive stars with mass greater than 8 solar masses exhaust their nuclear fuel. The iron core collapses, producing a neutron star or black hole and ejecting the outer envelope.",
|
| 456 |
+
"Type Ia supernovae are used as standard candles in cosmology because their peak luminosity is uniform. This enables measurement of cosmic distances and the expansion rate of the universe.",
|
| 457 |
+
"Iron photodisintegration absorbs energy in the core of massive stars, removing pressure support and causing gravitational collapse. This is the trigger mechanism for core-collapse supernovae.",
|
| 458 |
+
"The Chandrasekhar limit of approximately 1.44 solar masses is the maximum mass for which electron degeneracy pressure can support a white dwarf. Exceeding this limit causes carbon ignition and complete disruption.",
|
| 459 |
+
]
|
| 460 |
+
},
|
| 461 |
+
}
|
| 462 |
+
|
| 463 |
+
# βββ Gradio Interface ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 464 |
+
def process_query(query: str, domain: str, chunk_text: str, example_select: str) -> Tuple[str, str]:
|
| 465 |
+
"""Main processing function for Gradio interface."""
|
| 466 |
+
# Handle example selection
|
| 467 |
+
if example_select and example_select != "Custom Input":
|
| 468 |
+
ex = EXAMPLE_QUERIES.get(example_select, {})
|
| 469 |
+
if ex:
|
| 470 |
+
query = ex["query"]
|
| 471 |
+
domain = ex["domain"]
|
| 472 |
+
chunks = ex["chunks"]
|
| 473 |
+
chunk_display = "\n---\n".join(chunks)
|
| 474 |
+
else:
|
| 475 |
+
if not query.strip():
|
| 476 |
+
return "β οΈ Please enter a query.", ""
|
| 477 |
+
chunks = [c.strip() for c in chunk_text.split("---") if c.strip()]
|
| 478 |
+
chunk_display = chunk_text
|
| 479 |
+
else:
|
| 480 |
+
if not query.strip():
|
| 481 |
+
return "β οΈ Please enter a query.", ""
|
| 482 |
+
chunks = [c.strip() for c in chunk_text.split("---") if c.strip()]
|
| 483 |
+
if not chunks:
|
| 484 |
+
return "β οΈ Please enter at least one document chunk (separate chunks with ---).", ""
|
| 485 |
+
chunk_display = chunk_text
|
| 486 |
+
|
| 487 |
+
if not chunks:
|
| 488 |
+
return "β οΈ No document chunks found. Use --- to separate chunks.", ""
|
| 489 |
+
|
| 490 |
+
try:
|
| 491 |
+
trace = run_vortexrag_pipeline(query, chunks, domain)
|
| 492 |
+
result = format_pipeline_trace(trace, query, domain)
|
| 493 |
+
return result, "\n---\n".join(chunks) if example_select != "Custom Input" else chunk_text
|
| 494 |
+
except Exception as e:
|
| 495 |
+
return f"β Error: {str(e)}", chunk_text
|
| 496 |
+
|
| 497 |
+
|
| 498 |
+
# βββ Build the App βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 499 |
+
DESCRIPTION = """
|
| 500 |
+
# π VORTEXRAG β Interactive Pipeline Demo
|
| 501 |
+
|
| 502 |
+
**Vector Orthogonal Resonance-Tuned EXtraction RAG** β A 7-layer framework for causal RAG that simultaneously eliminates Semantic Drift and Context Window Poisoning.
|
| 503 |
+
|
| 504 |
+
**Results:** EM=74.8 | F1=82.6 | Faithfulness=0.94 | +13.6 EM over Naive RAG | +7.9 EM over CRAG
|
| 505 |
+
|
| 506 |
+
π [Paper (Zenodo)](https://doi.org/10.5281/zenodo.20285144) | π» [GitHub](https://github.com/vignesh2027/VORTEXRAG) | π [Docs](https://vignesh2027.github.io/VORTEXRAG)
|
| 507 |
+
"""
|
| 508 |
+
|
| 509 |
+
HOW_IT_WORKS = """
|
| 510 |
+
### How VORTEXRAG Works
|
| 511 |
+
|
| 512 |
+
| Layer | Name | What It Does | Key Formula |
|
| 513 |
+
|-------|------|--------------|-------------|
|
| 514 |
+
| 1 | **TVE** | Encodes text into 864d tri-vector (semantic+syntactic+causal) | `v = [Ξ±Β·sem; Ξ²Β·syn; Ξ³Β·cau]` |
|
| 515 |
+
| 2 | **VRC** | Spiral-rank candidates by causal alignment | `spiral = TVEΒ·e^{βΞ»r}Β·cos(nΞΈ)` |
|
| 516 |
+
| 3 | **SDC** | Rejects semantically-drifted chunks | `SDS = 1βtanh(βDβ/Ο) β₯ 0.72` |
|
| 517 |
+
| 4 | **CPG** | Purges context-poisoning chunks | `ESR = Ξ£SDSΒ·w/(P+Ξ΅) β₯ 3.5` |
|
| 518 |
+
| 5 | **RFG** | Multiplicative rank fusion (no weak links) | `Ξ¦ = TVE^Ξ± Γ SDS^Ξ² Γ ESR^Ξ³` |
|
| 519 |
+
| 6 | **CCB** | Places root-cause chunks at position 0 | `pos = rank(Ξ¦+) Γ causal_depth` |
|
| 520 |
+
| 7 | **FV** | Faithfulness verification gate | `ΞR = 1βROUGE-LΓNLI β€ 0.15` |
|
| 521 |
+
|
| 522 |
+
**11 Domain Presets:** scientific, medical, legal, financial, code, cybersecurity, educational, historical, customer support, creative, general β each with calibrated (Ξ±,Ξ²,Ξ³,Ο,ΞΈ_CPG,Ξ΄_SDC,Ξ΄_FV) parameters.
|
| 523 |
+
"""
|
| 524 |
+
|
| 525 |
+
with gr.Blocks(title="VORTEXRAG β 7-Layer Causal RAG", theme=gr.themes.Soft()) as demo:
|
| 526 |
+
gr.Markdown(DESCRIPTION)
|
| 527 |
+
|
| 528 |
+
with gr.Tabs():
|
| 529 |
+
with gr.TabItem("π Pipeline Demo"):
|
| 530 |
+
with gr.Row():
|
| 531 |
+
with gr.Column(scale=1):
|
| 532 |
+
example_select = gr.Dropdown(
|
| 533 |
+
label="π Load Example",
|
| 534 |
+
choices=["Custom Input"] + list(EXAMPLE_QUERIES.keys()),
|
| 535 |
+
value="Custom Input",
|
| 536 |
+
)
|
| 537 |
+
domain_select = gr.Dropdown(
|
| 538 |
+
label="π― Domain Preset",
|
| 539 |
+
choices=list(DOMAIN_PRESETS.keys()),
|
| 540 |
+
value="general",
|
| 541 |
+
)
|
| 542 |
+
query_input = gr.Textbox(
|
| 543 |
+
label="π Query",
|
| 544 |
+
placeholder="Enter your multi-hop or causal question here...",
|
| 545 |
+
lines=3,
|
| 546 |
+
)
|
| 547 |
+
chunk_input = gr.Textbox(
|
| 548 |
+
label="π Document Chunks (separate with ---)",
|
| 549 |
+
placeholder="Paste your document chunks here.\nSeparate each chunk with ---\n\nExample:\nThe 2008 crisis caused X.\n---\nDodd-Frank was enacted in 2010.\n---\nCDS exposure spread risk globally.",
|
| 550 |
+
lines=10,
|
| 551 |
+
)
|
| 552 |
+
run_btn = gr.Button("βΆ Run VORTEXRAG Pipeline", variant="primary")
|
| 553 |
+
|
| 554 |
+
with gr.Column(scale=2):
|
| 555 |
+
output = gr.Markdown(label="Pipeline Trace")
|
| 556 |
+
chunk_display = gr.Textbox(label="Loaded Chunks", lines=8, visible=False)
|
| 557 |
+
|
| 558 |
+
run_btn.click(
|
| 559 |
+
fn=process_query,
|
| 560 |
+
inputs=[query_input, domain_select, chunk_input, example_select],
|
| 561 |
+
outputs=[output, chunk_display]
|
| 562 |
+
)
|
| 563 |
+
|
| 564 |
+
example_select.change(
|
| 565 |
+
fn=lambda x: (
|
| 566 |
+
EXAMPLE_QUERIES.get(x, {}).get("query", "") if x != "Custom Input" else "",
|
| 567 |
+
EXAMPLE_QUERIES.get(x, {}).get("domain", "general") if x != "Custom Input" else "general",
|
| 568 |
+
),
|
| 569 |
+
inputs=[example_select],
|
| 570 |
+
outputs=[query_input, domain_select]
|
| 571 |
+
)
|
| 572 |
+
|
| 573 |
+
with gr.TabItem("π How It Works"):
|
| 574 |
+
gr.Markdown(HOW_IT_WORKS)
|
| 575 |
+
|
| 576 |
+
gr.Markdown("### π Benchmark Results")
|
| 577 |
+
gr.DataFrame(
|
| 578 |
+
value={
|
| 579 |
+
"System": ["Naive RAG", "BM25+Rerank", "HyDE", "CRAG", "Self-RAG", "FiD", "FLARE", "VORTEXRAG"],
|
| 580 |
+
"EM": [61.2, 59.8, 64.1, 66.9, 68.4, 63.5, 65.7, 74.8],
|
| 581 |
+
"F1": [68.4, 66.1, 71.8, 74.3, 75.9, 70.2, 72.9, 82.6],
|
| 582 |
+
"Faithfulness": [0.71, 0.69, 0.74, 0.78, 0.81, 0.73, 0.75, 0.94],
|
| 583 |
+
"SDR": ["β", "β", "12%", "31%", "35%", "8%", "14%", "61%"],
|
| 584 |
+
"Latency": ["120ms", "95ms", "340ms", "290ms", "410ms", "280ms", "320ms", "185ms"],
|
| 585 |
+
},
|
| 586 |
+
label="Main Benchmark Results (NQ+HotpotQA+MuSiQue+2Wiki)",
|
| 587 |
+
interactive=False,
|
| 588 |
+
)
|
| 589 |
+
|
| 590 |
+
gr.Markdown("### π¬ Layer-by-Layer Ablation")
|
| 591 |
+
gr.DataFrame(
|
| 592 |
+
value={
|
| 593 |
+
"Configuration": ["(A) Baseline", "(B)+TVE", "(C)+VRC", "(D)+SDC", "(E)+CPG", "(F)+RFG", "(G)+CCB", "(H)+FV [FULL]"],
|
| 594 |
+
"EM": [61.2, 65.3, 67.8, 70.4, 72.1, 73.4, 73.9, 74.8],
|
| 595 |
+
"F1": [68.4, 72.1, 74.9, 78.2, 80.3, 81.5, 82.0, 82.6],
|
| 596 |
+
"Faithfulness": [0.71, 0.75, 0.78, 0.83, 0.88, 0.90, 0.91, 0.94],
|
| 597 |
+
"ΞEM": ["+0", "+4.1", "+2.5", "+2.6", "+1.7", "+1.3", "+0.5", "+0.9"],
|
| 598 |
+
},
|
| 599 |
+
label="Layer-by-Layer Ablation Study",
|
| 600 |
+
interactive=False,
|
| 601 |
+
)
|
| 602 |
+
|
| 603 |
+
with gr.TabItem("βοΈ Domain Presets"):
|
| 604 |
+
gr.Markdown("### 11 Domain Preset Parameter Vectors")
|
| 605 |
+
gr.DataFrame(
|
| 606 |
+
value={
|
| 607 |
+
"Domain": list(DOMAIN_PRESETS.keys()),
|
| 608 |
+
"Ξ± (semantic)": [v["alpha"] for v in DOMAIN_PRESETS.values()],
|
| 609 |
+
"Ξ² (syntactic)": [v["beta"] for v in DOMAIN_PRESETS.values()],
|
| 610 |
+
"Ξ³ (causal)": [v["gamma"] for v in DOMAIN_PRESETS.values()],
|
| 611 |
+
"Ο (temperature)": [v["tau"] for v in DOMAIN_PRESETS.values()],
|
| 612 |
+
"ΞΈ_CPG": [v["theta_cpg"] for v in DOMAIN_PRESETS.values()],
|
| 613 |
+
"Ξ΄_SDC": [v["delta_sdc"] for v in DOMAIN_PRESETS.values()],
|
| 614 |
+
"Ξ΄_FV": [v["delta_fv"] for v in DOMAIN_PRESETS.values()],
|
| 615 |
+
},
|
| 616 |
+
label="Domain Preset Parameters",
|
| 617 |
+
interactive=False,
|
| 618 |
+
)
|
| 619 |
+
|
| 620 |
+
with gr.TabItem("π Links & Citation"):
|
| 621 |
+
gr.Markdown("""
|
| 622 |
+
### Cite VORTEXRAG
|
| 623 |
+
|
| 624 |
+
```bibtex
|
| 625 |
+
@article{vignesh2026vortexrag,
|
| 626 |
+
title = {{VORTEXRAG}: Vector Orthogonal Resonance-Tuned EXtraction Retrieval-Augmented Generation},
|
| 627 |
+
author = {Vignesh L},
|
| 628 |
+
year = {2026},
|
| 629 |
+
month = {May},
|
| 630 |
+
url = {https://github.com/vignesh2027/VORTEXRAG},
|
| 631 |
+
doi = {10.5281/zenodo.20285144},
|
| 632 |
+
note = {Independent Research. v2.0. Open-Source Preprint.},
|
| 633 |
+
keywords= {RAG, Semantic Drift, Context Window Poisoning, Causal NLP}
|
| 634 |
+
}
|
| 635 |
+
```
|
| 636 |
+
|
| 637 |
+
### Links
|
| 638 |
+
- π **Paper:** https://doi.org/10.5281/zenodo.20285144
|
| 639 |
+
- π» **GitHub:** https://github.com/vignesh2027/VORTEXRAG
|
| 640 |
+
- π **Docs:** https://vignesh2027.github.io/VORTEXRAG
|
| 641 |
+
- π **ORCID:** https://orcid.org/0009-0004-9777-7592
|
| 642 |
+
|
| 643 |
+
### Quick Start
|
| 644 |
+
```bash
|
| 645 |
+
git clone https://github.com/vignesh2027/VORTEXRAG
|
| 646 |
+
pip install -r requirements.txt
|
| 647 |
+
python examples/demo_gradio.py
|
| 648 |
+
```
|
| 649 |
+
|
| 650 |
+
### License
|
| 651 |
+
MIT License β Free for academic and commercial use.
|
| 652 |
+
|
| 653 |
+
**Author:** Vignesh L | Independent Researcher | May 2026
|
| 654 |
+
""")
|
| 655 |
+
|
| 656 |
+
demo.launch()
|