File size: 36,832 Bytes
6a85dbb 31190eb 6a85dbb 2bee1bf 6a85dbb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 | """
CTX: Trigger-Driven Dynamic Context Loading for Code-Aware LLM Agents
HuggingFace Space β Paper Demo
"""
import gradio as gr
import json
import re
import ast
import math
from collections import defaultdict
# βββββββββββββββββββββββββββββββββββββββββββββ
# Inline CTX core (self-contained for Space)
# βββββββββββββββββββββββββββββββββββββββββββββ
SAMPLE_CODEBASE = {
"main.py": '''
import pipeline
import config
def main():
"""Entry point for the CTX demo."""
cfg = config.load_config("config.yaml")
pipe = pipeline.Pipeline(cfg)
results = pipe.run(query="find all retrieval strategies")
return results
''',
"pipeline.py": '''
import retriever
import evaluator
import graph_builder
class Pipeline:
"""Main pipeline: query β retrieval β evaluation."""
def __init__(self, config):
self.config = config
self.retriever = retriever.Retriever(config)
self.evaluator = evaluator.Evaluator()
self.graph = graph_builder.build_import_graph(".")
def run(self, query):
docs = self.retriever.retrieve(query)
score = self.evaluator.score(docs, query)
return {"docs": docs, "score": score}
''',
"retriever.py": '''
import bm25_index
import symbol_index
import concept_index
class Retriever:
"""Trigger-based retrieval dispatcher."""
def __init__(self, config):
self.bm25 = bm25_index.BM25Index()
self.symbols = symbol_index.SymbolIndex()
self.concepts = concept_index.ConceptIndex()
def retrieve(self, query, k=5):
trigger = classify_trigger(query)
if trigger == "EXPLICIT_SYMBOL":
return self.symbols.search(query, k)
elif trigger == "SEMANTIC_CONCEPT":
return self.concepts.search(query, k)
else:
return self.bm25.search(query, k)
''',
"evaluator.py": '''
import metrics
class Evaluator:
"""Computes retrieval quality metrics."""
def score(self, docs, query):
recall = metrics.recall_at_k(docs, query, k=5)
token_ratio = metrics.token_efficiency(docs)
tes = recall / math.log(1 + len(docs)) if docs else 0
return {"recall@5": recall, "token%": token_ratio, "TES": tes}
''',
"graph_builder.py": '''
import ast
import os
import networkx as nx
def build_import_graph(root_dir):
"""Build import dependency graph via AST parsing."""
G = nx.DiGraph()
for root, dirs, files in os.walk(root_dir):
dirs[:] = [d for d in dirs if d not in ["venv", "__pycache__"]]
for f in files:
if f.endswith(".py"):
path = os.path.join(root, f)
imports = extract_imports(path)
G.add_node(f)
for imp in imports:
G.add_edge(f, imp + ".py")
return G
''',
"bm25_index.py": '''
from rank_bm25 import BM25Okapi
class BM25Index:
"""BM25 keyword retrieval over codebase."""
def __init__(self):
self.corpus = []
self.bm25 = None
def build(self, files):
tokenized = [f.split() for f in files]
self.bm25 = BM25Okapi(tokenized)
def search(self, query, k=5):
scores = self.bm25.get_scores(query.split())
top_k = sorted(enumerate(scores), key=lambda x: -x[1])[:k]
return [self.corpus[i] for i, _ in top_k]
''',
"symbol_index.py": '''
import ast
class SymbolIndex:
"""Exact symbol (function/class) lookup via AST."""
def __init__(self):
self.symbols = {}
def build(self, file_contents):
for fname, code in file_contents.items():
try:
tree = ast.parse(code)
for node in ast.walk(tree):
if isinstance(node, (ast.FunctionDef, ast.ClassDef)):
self.symbols[node.name] = fname
except SyntaxError:
pass
def search(self, query, k=5):
tokens = query.lower().split()
matches = []
for sym, fname in self.symbols.items():
if any(t in sym.lower() for t in tokens):
matches.append(fname)
return list(set(matches))[:k]
''',
"concept_index.py": '''
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
class ConceptIndex:
"""TF-IDF semantic concept matching."""
def __init__(self):
self.vectorizer = TfidfVectorizer()
self.matrix = None
self.files = []
def build(self, file_contents):
self.files = list(file_contents.keys())
corpus = list(file_contents.values())
self.matrix = self.vectorizer.fit_transform(corpus)
def search(self, query, k=5):
qvec = self.vectorizer.transform([query])
sims = cosine_similarity(qvec, self.matrix)[0]
top_k = sorted(enumerate(sims), key=lambda x: -x[1])[:k]
return [self.files[i] for i, _ in top_k]
''',
"metrics.py": '''
import math
def recall_at_k(retrieved, relevant, k=5):
"""Recall@K: fraction of relevant items retrieved."""
if not relevant:
return 0.0
retrieved_set = set(retrieved[:k])
relevant_set = set(relevant) if isinstance(relevant, list) else {relevant}
return len(retrieved_set & relevant_set) / len(relevant_set)
def token_efficiency(docs, total_tokens=10000):
"""Estimate token usage ratio."""
est_tokens = sum(len(d.split()) * 4 for d in docs) if docs else 0
return min(1.0, est_tokens / total_tokens)
def tes(recall, n_retrieved):
"""Trade-off Efficiency Score = Recall / ln(1 + |retrieved|)"""
if n_retrieved == 0:
return 0.0
return recall / math.log(1 + n_retrieved)
''',
"config.py": '''
import yaml
def load_config(path):
"""Load YAML configuration file."""
try:
with open(path) as f:
return yaml.safe_load(f)
except FileNotFoundError:
return {"k": 5, "strategy": "adaptive_trigger", "max_hops": 2}
''',
}
TRIGGER_PATTERNS = {
"EXPLICIT_SYMBOL": [
r'\b(function|class|method|def|variable|symbol|`[^`]+`)\b',
r'\bwhat does\s+\w+\s+do\b',
r'\bhow does\s+\w+\s+work\b',
r'\b[A-Z][a-zA-Z]+\.[a-zA-Z]+\b',
],
"IMPLICIT_CONTEXT": [
r'\b(import|depend|module|used by|calls|related to|what modules|which files)\b',
r'\b(dependency|dependencies|transitive|downstream|upstream)\b',
],
"TEMPORAL_HISTORY": [
r'\b(recent|latest|last|previously|changed|modified|updated|history)\b',
r'\b(before|after|since|ago|commit)\b',
],
"SEMANTIC_CONCEPT": [
r'\b(pipeline|architecture|design|pattern|flow|how|overview|explain)\b',
r'\b(concept|idea|purpose|strategy|approach|mechanism)\b',
],
}
def classify_trigger(query: str) -> str:
q = query.lower()
scores = defaultdict(int)
for trigger, patterns in TRIGGER_PATTERNS.items():
for pat in patterns:
if re.search(pat, q):
scores[trigger] += 1
if not scores:
return "SEMANTIC_CONCEPT"
return max(scores, key=scores.get)
def build_import_graph(files: dict) -> dict:
graph = defaultdict(set)
for fname, code in files.items():
try:
tree = ast.parse(code)
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
dep = alias.name.split(".")[0] + ".py"
if dep in files:
graph[fname].add(dep)
elif isinstance(node, ast.ImportFrom) and node.module:
dep = node.module.split(".")[0] + ".py"
if dep in files:
graph[fname].add(dep)
except SyntaxError:
pass
return graph
def bfs_expand(seed_files: list, graph: dict, max_hops: int = 2) -> list:
visited = set(seed_files)
frontier = list(seed_files)
for _ in range(max_hops):
next_frontier = []
for f in frontier:
for dep in graph.get(f, []):
if dep not in visited:
visited.add(dep)
next_frontier.append(dep)
frontier = next_frontier
return list(visited)
def ctx_retrieve(query: str, files: dict, k: int = 5):
trigger = classify_trigger(query)
graph = build_import_graph(files)
tokens_q = query.lower().split()
file_names = list(files.keys())
if trigger == "EXPLICIT_SYMBOL":
# Symbol index: AST-based function/class matching
matched = []
for fname, code in files.items():
try:
tree = ast.parse(code)
for node in ast.walk(tree):
if isinstance(node, (ast.FunctionDef, ast.ClassDef)):
if any(t in node.name.lower() for t in tokens_q):
matched.append(fname)
break
except SyntaxError:
pass
# Fallback to BM25 if no symbol match
if not matched:
matched = bm25_retrieve(query, files, k)
seeds = matched[:k]
elif trigger == "IMPLICIT_CONTEXT":
# Start with BM25 seeds, then BFS import graph
seeds_bm25 = bm25_retrieve(query, files, k=3)
seeds = bfs_expand(seeds_bm25, graph, max_hops=2)[:k]
elif trigger == "TEMPORAL_HISTORY":
# Return recently modified files (by position in dict as proxy)
seeds = list(files.keys())[-k:]
else: # SEMANTIC_CONCEPT
# TF-IDF cosine similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
corpus = list(files.values())
try:
vec = TfidfVectorizer(max_features=500)
mat = vec.fit_transform(corpus)
qvec = vec.transform([query])
sims = cosine_similarity(qvec, mat)[0]
top_k = sorted(enumerate(sims), key=lambda x: -x[1])[:k]
seeds = [file_names[i] for i, _ in top_k]
except Exception:
seeds = bm25_retrieve(query, files, k)
return trigger, seeds[:k]
def bm25_retrieve(query: str, files: dict, k: int = 5) -> list:
try:
from rank_bm25 import BM25Okapi
file_names = list(files.keys())
corpus = [f"{fn}\n{code}" for fn, code in files.items()]
tokenized = [doc.lower().split() for doc in corpus]
bm25 = BM25Okapi(tokenized)
scores = bm25.get_scores(query.lower().split())
top_k = sorted(enumerate(scores), key=lambda x: -x[1])[:k]
return [file_names[i] for i, _ in top_k]
except Exception:
return list(files.keys())[:k]
def full_context_retrieve(files: dict, k: int = 5) -> list:
return list(files.keys())
def run_demo(query: str, strategy: str, k: int):
if not query.strip():
return "Please enter a query.", "", ""
files = SAMPLE_CODEBASE
total_tokens = sum(len(c.split()) * 4 for c in files.values())
if strategy == "CTX (Adaptive Trigger)":
trigger, retrieved = ctx_retrieve(query, files, k)
strategy_note = f"Trigger: **{trigger}**"
elif strategy == "BM25":
retrieved = bm25_retrieve(query, files, k)
trigger = "N/A"
strategy_note = "Strategy: **BM25 keyword matching**"
else: # Full Context
retrieved = full_context_retrieve(files, k)
trigger = "N/A"
strategy_note = "Strategy: **Full context (all files)**"
# Build result
token_used = sum(len(files[f].split()) * 4 for f in retrieved if f in files)
token_pct = token_used / total_tokens * 100 if total_tokens > 0 else 0
n_retrieved = len(retrieved)
tes_val = (n_retrieved / len(files)) / math.log(1 + n_retrieved) if n_retrieved > 0 else 0
result_md = f"""### Retrieved Files ({n_retrieved}/{len(files)})
{strategy_note}
| # | File | Relevance |
|---|------|-----------|
"""
for i, fname in enumerate(retrieved, 1):
# Show first docstring/comment as relevance hint
code = files.get(fname, "")
first_line = [l.strip() for l in code.split("\n") if l.strip() and not l.strip().startswith("#")]
hint = first_line[1] if len(first_line) > 1 else "β"
if len(hint) > 60:
hint = hint[:57] + "..."
result_md += f"| {i} | `{fname}` | {hint} |\n"
metrics_md = f"""### Efficiency Metrics
| Metric | Value |
|--------|-------|
| Files retrieved | **{n_retrieved}** / {len(files)} |
| Token usage | **{token_pct:.1f}%** of codebase |
| TES (Recall/ln(1+k)) | **{tes_val:.3f}** |
> **TES** = efficiency-adjusted quality. CTX target: >0.7"""
# Show file contents preview
preview_md = "### Retrieved File Contents\n\n"
for fname in retrieved[:3]:
code = files.get(fname, "")
preview_md += f"**`{fname}`**\n```python\n{code.strip()[:300]}{'...' if len(code) > 300 else ''}\n```\n\n"
return result_md, metrics_md, preview_md
# βββββββββββββββββββββββββββββββββββββββββββββ
# Paper results data
# βββββββββββββββββββββββββββββββββββββββββββββ
SYNTHETIC_RESULTS = """
| Strategy | Recall@5 | Token% | TES |
|----------|----------|--------|-----|
| Full Context | 0.075 | 100.0% | 0.019 |
| BM25 | 0.982 | 18.7% | 0.410 |
| Dense TF-IDF | 0.973 | 21.0% | 0.406 |
| LlamaIndex | 0.972 | 20.1% | 0.405 |
| Chroma Dense | 0.829 | 19.3% | 0.346 |
| GraphRAG-lite | 0.523 | 24.0% | 0.218 |
| Hybrid Dense+CTX | 0.725 | 23.6% | 0.303 |
| **CTX (Ours)** | **0.874** | **5.2%** | **0.776** |
"""
COIR_RESULTS = """
| Strategy | Recall@1 | Recall@5 | MRR |
|----------|----------|----------|-----|
| Dense Embedding (MiniLM) | 0.960 | 1.000 | 0.978 |
| **Hybrid Dense+CTX** | **0.930** | **0.950** | **0.940** |
| BM25 | 0.920 | 0.980 | 0.946 |
| CTX Adaptive | 0.210 | 0.380 | 0.293 |
"""
TRIGGER_RESULTS = """
| Trigger Type | BM25 | TF-IDF | CTX | Delta |
|-------------|------|--------|-----|-------|
| EXPLICIT_SYMBOL | 0.81 | 0.73 | **0.97** | +19.8% |
| SEMANTIC_CONCEPT | 0.54 | **0.68** | 0.60 | β |
| TEMPORAL_HISTORY | 0.50 | 0.50 | **1.00** | +100% |
| IMPLICIT_CONTEXT | 0.40 | 0.40 | **1.00** | +150% |
"""
ABLATION_RESULTS = """
| Variant | Removed | Recall@5 | TES | IMPL_CONTEXT |
|---------|---------|----------|-----|--------------|
| Full CTX | β | **0.874** | **0.776** | **1.000** |
| No Graph | Import graph | 0.821 | 0.635 | 0.400 |
| No Classifier | Trigger type | 0.743 | 0.412 | 0.600 |
| Fixed-k=5 | Adaptive-k | 0.856 | 0.712 | 1.000 |
"""
CSS = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap');
:root {
--primary: #4F46E5;
--primary-light: #818CF8;
--accent: #06B6D4;
--bg: #0F172A;
--surface: #1E293B;
--surface2: #273347;
--border: #334155;
--text: #E2E8F0;
--text-muted: #94A3B8;
--success: #10B981;
--warning: #F59E0B;
}
body, .gradio-container {
background: var(--bg) !important;
color: var(--text) !important;
font-family: 'Inter', sans-serif !important;
}
.paper-hero {
background: linear-gradient(135deg, #1e1b4b 0%, #0f172a 50%, #164e63 100%);
border: 1px solid var(--border);
border-radius: 16px;
padding: 48px 40px 40px;
margin-bottom: 8px;
position: relative;
overflow: hidden;
}
.paper-hero::before {
content: '';
position: absolute;
top: -50%;
left: -50%;
width: 200%;
height: 200%;
background: radial-gradient(circle at 30% 40%, rgba(79,70,229,0.15) 0%, transparent 50%),
radial-gradient(circle at 70% 60%, rgba(6,182,212,0.1) 0%, transparent 50%);
pointer-events: none;
}
.paper-title {
font-size: 2.2em;
font-weight: 700;
background: linear-gradient(135deg, #818CF8, #38BDF8);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
margin: 0 0 8px;
line-height: 1.2;
}
.paper-subtitle {
font-size: 1.05em;
color: var(--text-muted);
margin: 0 0 20px;
font-weight: 400;
}
.authors {
font-size: 1em;
color: #94A3B8;
margin-bottom: 24px;
}
.badge-row {
display: flex;
gap: 10px;
flex-wrap: wrap;
}
.badge {
display: inline-flex;
align-items: center;
gap: 6px;
padding: 6px 14px;
border-radius: 20px;
font-size: 0.82em;
font-weight: 500;
text-decoration: none;
border: 1px solid;
transition: all 0.2s;
}
.badge-paper { background: rgba(79,70,229,0.2); border-color: #4F46E5; color: #818CF8; }
.badge-code { background: rgba(16,185,129,0.2); border-color: #10B981; color: #34D399; }
.badge-arxiv { background: rgba(245,158,11,0.2); border-color: #F59E0B; color: #FCD34D; }
.metric-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
gap: 16px;
margin: 20px 0;
}
.metric-card {
background: var(--surface);
border: 1px solid var(--border);
border-radius: 12px;
padding: 20px;
text-align: center;
}
.metric-value {
font-size: 2.2em;
font-weight: 700;
color: var(--primary-light);
line-height: 1;
margin-bottom: 6px;
font-family: 'JetBrains Mono', monospace;
}
.metric-label {
font-size: 0.78em;
color: var(--text-muted);
text-transform: uppercase;
letter-spacing: 0.08em;
}
.section-header {
font-size: 1.3em;
font-weight: 600;
color: var(--text);
border-left: 3px solid var(--primary);
padding-left: 12px;
margin: 24px 0 16px;
}
.abstract-box {
background: var(--surface);
border: 1px solid var(--border);
border-radius: 12px;
padding: 24px;
font-size: 0.95em;
line-height: 1.75;
color: var(--text);
}
.highlight {
background: rgba(79,70,229,0.15);
border: 1px solid rgba(79,70,229,0.3);
border-radius: 4px;
padding: 1px 6px;
color: var(--primary-light);
font-family: 'JetBrains Mono', monospace;
font-size: 0.9em;
}
.contribution-list {
list-style: none;
padding: 0;
}
.contribution-list li {
padding: 10px 0 10px 28px;
position: relative;
border-bottom: 1px solid var(--border);
color: var(--text);
font-size: 0.92em;
line-height: 1.6;
}
.contribution-list li::before {
content: 'βΈ';
position: absolute;
left: 8px;
color: var(--primary-light);
font-size: 0.8em;
top: 12px;
}
.demo-section {
background: var(--surface);
border: 1px solid var(--border);
border-radius: 12px;
padding: 24px;
}
.bibtex-box {
background: #0d1117;
border: 1px solid var(--border);
border-radius: 8px;
padding: 20px;
font-family: 'JetBrains Mono', monospace;
font-size: 0.82em;
color: #7dd3fc;
white-space: pre;
overflow-x: auto;
}
table { width: 100%; border-collapse: collapse; }
th { background: var(--surface2); color: var(--primary-light); padding: 10px 12px; text-align: left; font-size: 0.85em; text-transform: uppercase; letter-spacing: 0.05em; }
td { padding: 9px 12px; border-bottom: 1px solid var(--border); font-size: 0.9em; color: var(--text); }
tr:hover td { background: rgba(255,255,255,0.03); }
strong { color: #34D399; }
.tabs { border-radius: 12px; overflow: hidden; }
button.selected { background: var(--primary) !important; color: white !important; }
.gr-button-primary { background: var(--primary) !important; border: none !important; }
.gr-button-primary:hover { background: var(--primary-light) !important; }
label { color: var(--text-muted) !important; font-size: 0.85em !important; }
.gr-input, .gr-dropdown select, textarea {
background: var(--surface) !important;
border: 1px solid var(--border) !important;
color: var(--text) !important;
border-radius: 8px !important;
}
"""
HERO_HTML = """
<div class="paper-hero">
<div class="paper-title">CTX: Trigger-Driven Dynamic Context Loading</div>
<div class="paper-subtitle">for Code-Aware LLM Agents</div>
<div class="authors">Jeawon Jang Β· <span style="color:#64748b">be2jay67@gmail.com</span></div>
<div class="badge-row">
<span class="badge badge-code">β‘ Code: github.com/jaytoone/CTX</span>
<span class="badge badge-arxiv">π arXiv: Pending Endorsement (cs.IR)</span>
<span class="badge badge-paper">π 8 Strategies Β· 415 Queries Β· p<0.05</span>
</div>
</div>
"""
METRICS_HTML = """
<div class="metric-grid">
<div class="metric-card">
<div class="metric-value">0.776</div>
<div class="metric-label">TES Score</div>
</div>
<div class="metric-card">
<div class="metric-value" style="color:#34D399">1.9Γ</div>
<div class="metric-label">vs BM25 Baseline</div>
</div>
<div class="metric-card">
<div class="metric-value" style="color:#F59E0B">5.2%</div>
<div class="metric-label">Token Usage</div>
</div>
<div class="metric-card">
<div class="metric-value" style="color:#06B6D4">0.95</div>
<div class="metric-label">Hybrid COIR R@5</div>
</div>
<div class="metric-card">
<div class="metric-value" style="color:#EC4899">1.00</div>
<div class="metric-label">IMPLICIT Recall@5</div>
</div>
<div class="metric-card">
<div class="metric-value">415</div>
<div class="metric-label">Total Queries</div>
</div>
</div>
"""
ABSTRACT_HTML = """
<div class="abstract-box">
Large language models suffer from <strong style="color:#F87171">context dilution</strong> when processing
extensive codebases β the <em>"Lost in the Middle"</em> problem. Standard RAG approaches treat code as flat text,
ignoring the structural dependency information in <span class="highlight">import graphs</span>.<br><br>
We present <strong>CTX</strong>, a trigger-driven dynamic context loading system that classifies developer queries
into four types β <span class="highlight">EXPLICIT_SYMBOL</span>, <span class="highlight">SEMANTIC_CONCEPT</span>,
<span class="highlight">TEMPORAL_HISTORY</span>, <span class="highlight">IMPLICIT_CONTEXT</span> β and routes each
to a specialized retrieval pipeline.<br><br>
For dependency-sensitive queries, CTX performs <strong>breadth-first traversal</strong> over the codebase import
graph, resolving transitive relationships invisible to keyword and embedding methods. Evaluated on a synthetic
benchmark (50 files, 166 queries) and three real Python codebases (968 files total, 249 queries), CTX achieves
<strong style="color:#34D399">TES 1.9Γ higher than BM25</strong> with only <strong style="color:#F59E0B">5.2% token usage</strong>.
Statistical significance via McNemar and Wilcoxon tests (<em>p</em><0.05) across 415 queries.
</div>
"""
CONTRIBUTIONS_HTML = """
<ul class="contribution-list">
<li><strong>Four-type trigger taxonomy</strong> β EXPLICIT_SYMBOL, SEMANTIC_CONCEPT, TEMPORAL_HISTORY, IMPLICIT_CONTEXT, each mapped to a specialized retrieval strategy enabling adaptive resource allocation.</li>
<li><strong>Import graph traversal</strong> β BFS-based algorithm over the codebase import graph resolving transitive dependencies. Recall@5 = <strong style="color:#34D399">1.0</strong> on dependency queries vs 0.4 for BM25 β 150% improvement.</li>
<li><strong>TES metric</strong> β Trade-off Efficiency Score = Recall@K / ln(1 + |retrieved|), unified measure of accuracy-efficiency. Pearson r = 0.87 correlation with NDCG@5 (p<0.001).</li>
<li><strong>Hybrid Dense+CTX</strong> β Two-stage pipeline combining dense neural seed selection with import graph expansion. COIR Recall@5 = 0.950 (+150% over CTX alone), validating complementary nature of semantic and structural retrieval.</li>
</ul>
"""
BIBTEX = """@misc{jang2026ctx,
title = {CTX: Trigger-Driven Dynamic Context Loading
for Code-Aware LLM Agents},
author = {Jeawon Jang},
year = {2026},
note = {Preprint. Code: https://github.com/jaytoone/CTX},
url = {https://github.com/jaytoone/CTX}
}"""
# βββββββββββββββββββββββββββββββββββββββββββββ
# Build Gradio Interface
# βββββββββββββββββββββββββββββββββββββββββββββ
with gr.Blocks(css=CSS, title="CTX β Trigger-Driven Dynamic Context Loading") as demo:
gr.HTML(HERO_HTML)
gr.HTML(METRICS_HTML)
with gr.Tabs():
# ββ Tab 1: Paper ββββββββββββββββββββββββββ
with gr.Tab("π Paper"):
gr.HTML('<div class="section-header">Abstract</div>')
gr.HTML(ABSTRACT_HTML)
gr.HTML('<div class="section-header">Contributions</div>')
gr.HTML(CONTRIBUTIONS_HTML)
gr.HTML('<div class="section-header">Architecture</div>')
gr.Markdown("""
```
Query Input
β
βΌ
ββββββββββββββββββββββββββββββββ
β Trigger Classifier β β regex + keyword patterns
β (EXPLICIT / SEMANTIC / β
β TEMPORAL / IMPLICIT) β
ββββββββββββββββ¬ββββββββββββββββ
β
βββββββββ΄βββββββββ¬βββββββββββββββ¬ββββββββββββββββββ
β β β β
βΌ βΌ βΌ βΌ
Symbol Index TF-IDF/Dense History Log Import Graph BFS
(AST lookup) (cosine sim) (git history) (transitive deps)
β β β β
βββββββββ¬ββββββββββ΄βββββββββββββββ΄ββββββββββββββββββ
β
ββββββββββββΌβββββββββββββββ
β Adaptive-k Selection β β k = f(query_type, codebase_size)
β (3~10 files) β
ββββββββββββ¬βββββββββββββββ
β
βΌ
LLM Context
(5.2% tokens)
```
""")
# ββ Tab 2: Results ββββββββββββββββββββββββ
with gr.Tab("π Results"):
gr.HTML('<div class="section-header">Synthetic Benchmark (50 files, 166 queries)</div>')
gr.Markdown(SYNTHETIC_RESULTS)
gr.HTML('<div class="section-header">COIR External Benchmark (CodeSearchNet Python)</div>')
gr.Markdown(COIR_RESULTS)
gr.HTML('<div class="section-header">Per-Trigger-Type Recall@5 (Synthetic)</div>')
gr.Markdown(TRIGGER_RESULTS)
gr.HTML('<div class="section-header">Ablation Study</div>')
gr.Markdown(ABLATION_RESULTS)
gr.Markdown("""
**Key Findings:**
- Removing import graph β IMPLICIT_CONTEXT recall drops **60%** (1.0 β 0.4)
- Removing trigger classifier β TES drops **47%** (0.776 β 0.412)
- TESβNDCG@5 Pearson **r = 0.87** (p < 0.001, 28 strategy-dataset pairs)
- pass@1 with MiniMax M2.5: CTX **0.265** vs Full Context **0.102** (n=49, McNemar p<0.05)
""")
# ββ Tab 3: Live Demo ββββββββββββββββββββββ
with gr.Tab("π Live Demo"):
gr.HTML('<div class="demo-section">')
gr.Markdown("### Try CTX on a Sample Codebase")
gr.Markdown(
"The demo runs CTX retrieval on a 10-file sample Python codebase "
"(pipeline, retriever, evaluator, graph builder, metrics, etc.). "
"Enter a natural language query and compare retrieval strategies."
)
with gr.Row():
with gr.Column(scale=2):
query_input = gr.Textbox(
label="Code Query",
placeholder="e.g., 'what does the retriever do?' or 'find all modules that import graph_builder'",
lines=2,
)
with gr.Column(scale=1):
strategy_select = gr.Dropdown(
choices=["CTX (Adaptive Trigger)", "BM25", "Full Context"],
value="CTX (Adaptive Trigger)",
label="Retrieval Strategy",
)
k_slider = gr.Slider(minimum=1, maximum=10, value=5, step=1, label="k (max files)")
run_btn = gr.Button("Retrieve", variant="primary")
gr.Markdown("**Example queries:**")
gr.Examples(
examples=[
["find all modules that depend on graph_builder", "CTX (Adaptive Trigger)", 5],
["what does the Pipeline class do?", "CTX (Adaptive Trigger)", 5],
["how does retrieval work?", "CTX (Adaptive Trigger)", 5],
["what does evaluator import?", "CTX (Adaptive Trigger)", 5],
["find all modules that depend on graph_builder", "BM25", 5],
],
inputs=[query_input, strategy_select, k_slider],
)
with gr.Row():
with gr.Column():
result_out = gr.Markdown(label="Retrieved Files")
with gr.Column():
metrics_out = gr.Markdown(label="Metrics")
preview_out = gr.Markdown(label="File Previews")
run_btn.click(
fn=run_demo,
inputs=[query_input, strategy_select, k_slider],
outputs=[result_out, metrics_out, preview_out],
)
gr.HTML('</div>')
# ββ Tab 4: Implementation βββββββββββββββββ
with gr.Tab("π¬ Implementation"):
gr.HTML('<div class="section-header">Core Algorithm β Trigger Classifier</div>')
gr.Code(value='''def classify_trigger(query: str) -> str:
"""
Classify a developer query into one of four trigger types.
Uses regex pattern matching β lightweight, no model required.
Returns: EXPLICIT_SYMBOL | SEMANTIC_CONCEPT | TEMPORAL_HISTORY | IMPLICIT_CONTEXT
"""
PATTERNS = {
"EXPLICIT_SYMBOL": [
r"\\b(function|class|method|def|variable|`[^`]+`)\\b",
r"\\bwhat does\\s+\\w+\\s+do\\b",
r"\\b[A-Z][a-zA-Z]+\\.[a-zA-Z]+\\b", # e.g. Pipeline.run
],
"IMPLICIT_CONTEXT": [
r"\\b(import|depend|module|used by|calls|related to)\\b",
r"\\b(dependency|transitive|downstream|upstream)\\b",
],
"TEMPORAL_HISTORY": [
r"\\b(recent|latest|last|changed|modified|history)\\b",
],
"SEMANTIC_CONCEPT": [
r"\\b(pipeline|architecture|design|flow|how|overview)\\b",
],
}
scores = defaultdict(int)
q = query.lower()
for trigger, patterns in PATTERNS.items():
for pat in patterns:
if re.search(pat, q):
scores[trigger] += 1
return max(scores, key=scores.get) if scores else "SEMANTIC_CONCEPT"''', language="python", label="trigger_classifier.py")
gr.HTML('<div class="section-header">Import Graph BFS β Key Differentiator vs RAG</div>')
gr.Code(value='''def build_import_graph(root_dir: str) -> nx.DiGraph:
"""
Parse Python AST to extract import relationships.
Creates a directed graph: file β imported_file edges.
O(N) in codebase size β runs in <1s on 1000-file codebases.
"""
G = nx.DiGraph()
for path in Path(root_dir).rglob("*.py"):
if any(skip in path.parts for skip in ["venv", "__pycache__", ".git"]):
continue
try:
tree = ast.parse(path.read_text(encoding="utf-8", errors="ignore"))
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
dep = alias.name.split(".")[0]
G.add_edge(str(path), dep)
elif isinstance(node, ast.ImportFrom) and node.module:
G.add_edge(str(path), node.module.split(".")[0])
except SyntaxError:
pass
return G
def bfs_expand(seed_files: list[str], graph: nx.DiGraph, max_hops: int = 2) -> list[str]:
"""
BFS traversal over import graph from seed files.
Resolves TRANSITIVE dependencies invisible to BM25/embedding methods.
Example: query "what does the evaluator use?"
seed: [evaluator.py]
hop1: [metrics.py, downstream_quality.py] β direct imports
hop2: [scipy, numpy, ...] β transitive imports
This is what gives CTX perfect Recall@5=1.0 on IMPLICIT_CONTEXT queries.
"""
visited = set(seed_files)
frontier = list(seed_files)
for _ in range(max_hops):
next_frontier = []
for f in frontier:
for neighbor in graph.successors(f):
if neighbor not in visited:
visited.add(neighbor)
next_frontier.append(neighbor)
frontier = next_frontier
return list(visited)''', language="python", label="adaptive_trigger.py β BFS Import Graph")
gr.HTML('<div class="section-header">TES Metric β Trade-off Efficiency Score</div>')
gr.Code(value='''def tes(recall_at_k: float, n_retrieved: int) -> float:
"""
Trade-off Efficiency Score: balances recall against context size.
TES = Recall@K / ln(1 + |retrieved|)
Intuition: diminishing returns of loading more files.
- Loading 1 file: ln(2) = 0.693 penalty
- Loading 10 files: ln(11) = 2.398 penalty
- Loading ALL files: ln(1001) = 6.909 penalty β Full Context collapses here
Results:
Strategy Recall@5 Token% TES
ββββββββββββββββ ββββββββββ ββββββ βββββ
Full Context 0.075 100.0% 0.019 β bad: high penalty
BM25 0.982 18.7% 0.410
CTX (Ours) 0.874 5.2% 0.776 β best: minimal files
Validated: Pearson r=0.87 with NDCG@5 (p<0.001, 28 strategy-dataset pairs)
"""
if n_retrieved == 0:
return 0.0
return recall_at_k / math.log(1 + n_retrieved)
def adaptive_k(trigger_type: str, codebase_size: int) -> int:
"""
Adaptive retrieval budget based on trigger type.
Symbol lookups need few files; dependency queries need more hops.
"""
base = {
"EXPLICIT_SYMBOL": 3, # exact match β few files
"TEMPORAL_HISTORY": 3, # recent changes β few files
"SEMANTIC_CONCEPT": 5, # concept β moderate
"IMPLICIT_CONTEXT": 7, # graph traversal β more files
}
k = base.get(trigger_type, 5)
# Scale with codebase size (log scale)
if codebase_size > 500:
k = min(k + 2, 10)
return k''', language="python", label="metrics.py + adaptive_k")
gr.Markdown("""
---
**Full source**: [github.com/jaytoone/CTX](https://github.com/jaytoone/CTX)
```bash
git clone https://github.com/jaytoone/CTX && cd CTX
pip install -r requirements.txt
python run_experiment.py --dataset-size small --strategy all
```
""")
# ββ Tab 5: Citation βββββββββββββββββββββββ
with gr.Tab("π Citation"):
gr.HTML('<div class="section-header">BibTeX</div>')
gr.Textbox(value=BIBTEX, label="BibTeX", lines=10, max_lines=15)
gr.Markdown("""
### Links
- **GitHub**: https://github.com/jaytoone/CTX
- **arXiv**: Pending (cs.IR endorsement in progress β code: HBJRI6)
### Experiment Reproducibility
```bash
git clone https://github.com/jaytoone/CTX
cd CTX
pip install -r requirements.txt
# Synthetic benchmark (all 8 strategies)
python run_experiment.py --dataset-size small --strategy all
# Real codebase evaluation
python run_experiment.py --dataset-source real --project-path /path/to/project
# COIR benchmark
python run_coir_eval.py
# LLM pass@1 evaluation (requires MINIMAX_API_KEY)
python run_llm_eval_v2.py
```
""")
if __name__ == "__main__":
demo.launch()
|