Spaces:
Running
Running
| """ | |
| CTX — Context Transfer Format Demo | |
| Fox Valley AI Foundation | foxfoundation.ai | |
| Thin Gradio UI around the real ctx package. | |
| """ | |
| import asyncio | |
| import time | |
| import gradio as gr | |
| # --- Real CTX imports --- | |
| from ctx.converter.pipeline import convert as ctx_convert | |
| from ctx.parser import parse as ctx_parse | |
| # --- Token counting --- | |
| try: | |
| import tiktoken | |
| _ENC = tiktoken.get_encoding("cl100k_base") | |
| def count_tokens(text: str) -> int: | |
| return len(_ENC.encode(text)) | |
| except Exception: | |
| def count_tokens(text: str) -> int: | |
| return max(1, len(text) // 4) | |
| # --------------------------------------------------------------------------- | |
| # Conversion handler | |
| # --------------------------------------------------------------------------- | |
| def convert(url: str, raw_html: str, tier: str) -> tuple: | |
| """Convert URL or raw HTML to CTX using the real pipeline.""" | |
| start = time.time() | |
| try: | |
| if url and url.strip(): | |
| url = url.strip() | |
| if not url.startswith("http"): | |
| url = "https://" + url | |
| # Fetch the raw HTML first for metrics comparison | |
| import httpx | |
| resp = httpx.get(url, follow_redirects=True, timeout=20, | |
| headers={"User-Agent": "Mozilla/5.0 (compatible; CTX-Demo/1.0; +https://foxfoundation.ai)"}) | |
| resp.raise_for_status() | |
| html = resp.text | |
| # Run the real converter | |
| ctx_output = asyncio.run(ctx_convert(url, tier=tier)) | |
| source_url = url | |
| elif raw_html and raw_html.strip(): | |
| html = raw_html.strip() | |
| ctx_output = asyncio.run(ctx_convert(html, source_url="https://example.com", tier=tier)) | |
| source_url = "" | |
| else: | |
| return ("⚠️ Enter a URL or paste HTML to convert.", "", _empty_metrics()) | |
| elapsed = time.time() - start | |
| # Parse the CTX output to extract structured info | |
| try: | |
| doc = ctx_parse(ctx_output) | |
| ref_count = len(doc.refs) if hasattr(doc, 'refs') else 0 | |
| title = doc.header.attributes.get("title", "") if hasattr(doc, 'header') else "" | |
| except Exception: | |
| ref_count = ctx_output.count("§ref ") | |
| title = "" | |
| # Metrics | |
| html_bytes = len(html.encode("utf-8")) | |
| ctx_bytes = len(ctx_output.encode("utf-8")) | |
| html_tokens = count_tokens(html) | |
| ctx_tokens = count_tokens(ctx_output) | |
| byte_reduction = ((html_bytes - ctx_bytes) / html_bytes * 100) if html_bytes > 0 else 0 | |
| token_reduction = ((html_tokens - ctx_tokens) / html_tokens * 100) if html_tokens > 0 else 0 | |
| metrics = _build_metrics( | |
| html_bytes, ctx_bytes, html_tokens, ctx_tokens, | |
| byte_reduction, token_reduction, elapsed, ref_count, tier | |
| ) | |
| # HTML preview (truncated) | |
| html_preview = html[:8000] | |
| if len(html) > 8000: | |
| html_preview += f"\n\n... [{len(html) - 8000:,} more characters truncated]" | |
| return (ctx_output, html_preview, metrics) | |
| except Exception as e: | |
| return (f"⚠️ Error: {e}", "", _empty_metrics()) | |
| def _empty_metrics(): | |
| return """<div style="text-align:center; padding:48px; color:#888; | |
| font-family:'JetBrains Mono',monospace; font-size:14px;"> | |
| Waiting for input... | |
| </div>""" | |
| def _build_metrics(html_bytes, ctx_bytes, html_tokens, ctx_tokens, | |
| byte_pct, token_pct, elapsed, ref_count, tier): | |
| def fmt(n): | |
| if n >= 1_000_000: | |
| return f"{n/1_000_000:.1f}M" | |
| elif n >= 1_000: | |
| return f"{n/1_000:.1f}K" | |
| return str(n) | |
| byte_bar = max(3, 100 - byte_pct) | |
| token_bar = max(3, 100 - token_pct) | |
| return f""" | |
| <div style="font-family: 'JetBrains Mono', 'Fira Code', monospace; padding: 8px 0;"> | |
| <div style="text-align:center; margin-bottom:28px;"> | |
| <div style="font-size:64px; font-weight:800; letter-spacing:-3px; | |
| background: linear-gradient(135deg, #f97316 0%, #fb923c 50%, #fbbf24 100%); | |
| -webkit-background-clip:text; -webkit-text-fill-color:transparent; | |
| line-height:1;"> | |
| {token_pct:.0f}% | |
| </div> | |
| <div style="font-size:13px; color:#a1a1aa; margin-top:4px; letter-spacing:1px; text-transform:uppercase;"> | |
| Token Reduction | |
| </div> | |
| </div> | |
| <!-- Byte comparison --> | |
| <div style="margin-bottom:20px;"> | |
| <div style="display:flex; justify-content:space-between; font-size:11px; color:#71717a; margin-bottom:6px; text-transform:uppercase; letter-spacing:0.5px;"> | |
| <span>Bytes</span> | |
| <span>−{byte_pct:.1f}%</span> | |
| </div> | |
| <div style="position:relative; height:32px; border-radius:6px; overflow:hidden; background:#27272a;"> | |
| <div style="position:absolute; top:0; left:0; height:100%; width:100%; | |
| background:#3f3f46; border-radius:6px;"></div> | |
| <div style="position:absolute; top:0; left:0; height:100%; width:{byte_bar}%; | |
| background: linear-gradient(90deg, #f97316, #fb923c); | |
| border-radius:6px; transition: width 0.8s ease;"></div> | |
| <div style="position:absolute; top:0; left:0; height:100%; width:100%; | |
| display:flex; align-items:center; justify-content:space-between; padding:0 10px;"> | |
| <span style="font-size:11px; color:white; font-weight:600; text-shadow:0 1px 2px rgba(0,0,0,0.5);">{fmt(ctx_bytes)} CTX</span> | |
| <span style="font-size:11px; color:#a1a1aa;">{fmt(html_bytes)} HTML</span> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Token comparison --> | |
| <div style="margin-bottom:20px;"> | |
| <div style="display:flex; justify-content:space-between; font-size:11px; color:#71717a; margin-bottom:6px; text-transform:uppercase; letter-spacing:0.5px;"> | |
| <span>Tokens (cl100k)</span> | |
| <span>−{token_pct:.1f}%</span> | |
| </div> | |
| <div style="position:relative; height:32px; border-radius:6px; overflow:hidden; background:#27272a;"> | |
| <div style="position:absolute; top:0; left:0; height:100%; width:100%; | |
| background:#3f3f46; border-radius:6px;"></div> | |
| <div style="position:absolute; top:0; left:0; height:100%; width:{token_bar}%; | |
| background: linear-gradient(90deg, #3b82f6, #60a5fa); | |
| border-radius:6px; transition: width 0.8s ease;"></div> | |
| <div style="position:absolute; top:0; left:0; height:100%; width:100%; | |
| display:flex; align-items:center; justify-content:space-between; padding:0 10px;"> | |
| <span style="font-size:11px; color:white; font-weight:600; text-shadow:0 1px 2px rgba(0,0,0,0.5);">{fmt(ctx_tokens)} CTX</span> | |
| <span style="font-size:11px; color:#a1a1aa;">{fmt(html_tokens)} HTML</span> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Stats grid --> | |
| <div style="display:grid; grid-template-columns:1fr 1fr; gap:12px; margin-top:24px;"> | |
| <div style="background:#18181b; border:1px solid #27272a; border-radius:8px; padding:14px; text-align:center;"> | |
| <div style="font-size:20px; font-weight:700; color:#f97316;">{fmt(html_tokens)}</div> | |
| <div style="font-size:10px; color:#71717a; margin-top:2px; text-transform:uppercase; letter-spacing:0.5px;">HTML tokens</div> | |
| </div> | |
| <div style="background:#18181b; border:1px solid #27272a; border-radius:8px; padding:14px; text-align:center;"> | |
| <div style="font-size:20px; font-weight:700; color:#3b82f6;">{fmt(ctx_tokens)}</div> | |
| <div style="font-size:10px; color:#71717a; margin-top:2px; text-transform:uppercase; letter-spacing:0.5px;">CTX tokens</div> | |
| </div> | |
| <div style="background:#18181b; border:1px solid #27272a; border-radius:8px; padding:14px; text-align:center;"> | |
| <div style="font-size:20px; font-weight:700; color:#22c55e;">{elapsed:.2f}s</div> | |
| <div style="font-size:10px; color:#71717a; margin-top:2px; text-transform:uppercase; letter-spacing:0.5px;">Convert time</div> | |
| </div> | |
| <div style="background:#18181b; border:1px solid #27272a; border-radius:8px; padding:14px; text-align:center;"> | |
| <div style="font-size:20px; font-weight:700; color:#a78bfa;">{ref_count}</div> | |
| <div style="font-size:10px; color:#71717a; margin-top:2px; text-transform:uppercase; letter-spacing:0.5px;">Citations</div> | |
| </div> | |
| </div> | |
| <!-- Tier + Cost --> | |
| <div style="margin-top:16px; display:grid; grid-template-columns:1fr 1fr; gap:12px;"> | |
| <div style="background:#18181b; border:1px solid #27272a; border-radius:8px; padding:14px;"> | |
| <div style="font-size:10px; color:#71717a; text-transform:uppercase; letter-spacing:1px; margin-bottom:6px;"> | |
| Pipeline | |
| </div> | |
| <div style="font-size:14px; color:#fbbf24; font-weight:600;"> | |
| {tier.upper()} tier | |
| </div> | |
| <div style="font-size:10px; color:#52525b; margin-top:2px;"> | |
| {"DOM rules only" if tier == "fast" else "DOM + regex NER" if tier == "smart" else "DOM + NER + VLM"} | |
| </div> | |
| </div> | |
| <div style="background:#18181b; border:1px solid #27272a; border-radius:8px; padding:14px;"> | |
| <div style="font-size:10px; color:#71717a; text-transform:uppercase; letter-spacing:1px; margin-bottom:6px;"> | |
| Cost @ $3/1M tokens | |
| </div> | |
| <div style="display:flex; justify-content:space-between; align-items:baseline;"> | |
| <span style="font-size:12px; color:#ef4444; text-decoration:line-through;">${html_tokens * 3 / 1_000_000:.4f}</span> | |
| <span style="font-size:14px; color:#22c55e; font-weight:600;">${ctx_tokens * 3 / 1_000_000:.4f}</span> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| # --------------------------------------------------------------------------- | |
| # Custom CSS | |
| # --------------------------------------------------------------------------- | |
| CUSTOM_CSS = """ | |
| @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;600;700;800&family=DM+Sans:ital,wght@0,400;0,500;0,600;0,700;1,400&display=swap'); | |
| .gradio-container { | |
| font-family: 'DM Sans', -apple-system, sans-serif !important; | |
| max-width: 1400px !important; | |
| } | |
| .hero-header { | |
| text-align: center; | |
| padding: 32px 20px 24px; | |
| border-bottom: 1px solid #27272a; | |
| margin-bottom: 8px; | |
| } | |
| .hero-header h1 { | |
| font-family: 'JetBrains Mono', monospace !important; | |
| font-size: 42px !important; | |
| font-weight: 800 !important; | |
| letter-spacing: -2px !important; | |
| margin: 0 !important; | |
| background: linear-gradient(135deg, #f97316 0%, #fb923c 40%, #fbbf24 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| line-height: 1.1 !important; | |
| } | |
| .hero-sub { | |
| font-size: 15px; | |
| color: #a1a1aa; | |
| margin-top: 8px; | |
| letter-spacing: 0.3px; | |
| } | |
| .hero-links { | |
| margin-top: 14px; | |
| font-size: 12px; | |
| color: #71717a; | |
| letter-spacing: 0.5px; | |
| } | |
| .hero-links a { | |
| color: #f97316 !important; | |
| text-decoration: none; | |
| } | |
| .hero-links a:hover { | |
| text-decoration: underline; | |
| } | |
| .hero-badge { | |
| display: inline-block; | |
| background: #27272a; | |
| border: 1px solid #3f3f46; | |
| border-radius: 100px; | |
| padding: 4px 14px; | |
| font-size: 11px; | |
| color: #a1a1aa; | |
| letter-spacing: 1px; | |
| text-transform: uppercase; | |
| margin-bottom: 16px; | |
| font-family: 'JetBrains Mono', monospace; | |
| } | |
| .ctx-output textarea { | |
| font-family: 'JetBrains Mono', monospace !important; | |
| font-size: 12px !important; | |
| line-height: 1.6 !important; | |
| background: #0a0a0a !important; | |
| border: 1px solid #27272a !important; | |
| } | |
| .html-preview textarea { | |
| font-family: 'JetBrains Mono', monospace !important; | |
| font-size: 11px !important; | |
| line-height: 1.5 !important; | |
| color: #71717a !important; | |
| background: #0a0a0a !important; | |
| border: 1px solid #27272a !important; | |
| } | |
| .convert-btn { | |
| background: linear-gradient(135deg, #f97316, #ea580c) !important; | |
| color: white !important; | |
| font-family: 'JetBrains Mono', monospace !important; | |
| font-weight: 700 !important; | |
| font-size: 14px !important; | |
| letter-spacing: 1px !important; | |
| text-transform: uppercase !important; | |
| border: none !important; | |
| border-radius: 8px !important; | |
| padding: 12px 32px !important; | |
| min-height: 48px !important; | |
| transition: all 0.2s ease !important; | |
| } | |
| .convert-btn:hover { | |
| transform: translateY(-1px) !important; | |
| box-shadow: 0 4px 20px rgba(249,115,22,0.4) !important; | |
| } | |
| .section-label { | |
| font-family: 'JetBrains Mono', monospace; | |
| font-size: 10px; | |
| letter-spacing: 1.5px; | |
| text-transform: uppercase; | |
| color: #52525b; | |
| padding: 12px 0 4px; | |
| } | |
| .metrics-panel { | |
| border: 1px solid #27272a; | |
| border-radius: 12px; | |
| padding: 20px; | |
| background: #09090b; | |
| } | |
| .format-example { | |
| background: #0a0a0a; | |
| border: 1px solid #27272a; | |
| border-radius: 8px; | |
| padding: 16px; | |
| font-family: 'JetBrains Mono', monospace; | |
| font-size: 12px; | |
| line-height: 1.6; | |
| color: #d4d4d8; | |
| white-space: pre-wrap; | |
| margin: 12px 0; | |
| } | |
| .pipeline-info { | |
| background: #0a0a0a; | |
| border: 1px solid #27272a; | |
| border-radius: 8px; | |
| padding: 14px 16px; | |
| font-family: 'JetBrains Mono', monospace; | |
| font-size: 11px; | |
| line-height: 1.7; | |
| color: #71717a; | |
| margin: 8px 0; | |
| } | |
| .pipeline-info span.stage { | |
| color: #f97316; | |
| font-weight: 600; | |
| } | |
| .pipeline-info span.arrow { | |
| color: #3f3f46; | |
| } | |
| """ | |
| # --------------------------------------------------------------------------- | |
| # UI | |
| # --------------------------------------------------------------------------- | |
| HEADER_HTML = """ | |
| <div class="hero-header"> | |
| <div class="hero-badge">Fox Valley AI Foundation</div> | |
| <h1>§ CTX</h1> | |
| <div class="hero-sub"> | |
| Context Transfer Format — the content layer between the web and AI<br/> | |
| <strong style="color:#d4d4d8;">87–90% token reduction</strong> on real-world web pages | |
| </div> | |
| <div class="hero-links"> | |
| <a href="https://github.com/mtecnic/ctx" target="_blank">GitHub</a> · | |
| <a href="https://github.com/mtecnic/ctx/blob/main/specification.md" target="_blank">Spec v1.0</a> · | |
| <a href="https://foxfoundation.ai" target="_blank">foxfoundation.ai</a> · | |
| <a href="https://x.com/NicW_AI" target="_blank">@NicW_AI</a> | |
| </div> | |
| </div> | |
| """ | |
| FORMAT_EXAMPLE = """<div class="format-example"><span style="color:#f97316;">§doc.ctx_v1.0</span> url=example.com/article title="Article" <span style="color:#60a5fa;">†type</span>=article <span style="color:#60a5fa;">†lang</span>=en | |
| <span style="color:#52525b;">§nav [skip] | |
| Main menu...</span> | |
| <span style="color:#f97316;">§content.article</span> | |
| <span style="color:#f97316;">§1</span> Introduction | |
| <span style="color:#f97316;">§p</span> Content with citations <span style="color:#22c55e;">[ref1]</span> preserved... | |
| <span style="color:#f97316;">§2</span> Subsection | |
| <span style="color:#f97316;">§p</span> Hierarchy intact, zero wasted tokens. | |
| <span style="color:#52525b;">§footer [skip] | |
| Copyright...</span> | |
| <span style="color:#22c55e;">§ref</span> id=ref1 url=example.com title="Source" <span style="color:#60a5fa;">†rel</span>=related</div>""" | |
| PIPELINE_HTML = """<div class="pipeline-info"> | |
| <span class="stage">Fetch</span> <span class="arrow">→</span> | |
| <span class="stage">Extract</span> <span class="arrow">→</span> | |
| <span class="stage">Classify</span> <span class="arrow">→</span> | |
| <span class="stage">Annotate</span> <span class="arrow">→</span> | |
| <span class="stage">Normalize</span> <span class="arrow">→</span> | |
| <span class="stage">Emit</span> <span class="arrow">→</span> | |
| <span style="color:#fbbf24; font-weight:700;">§ CTX</span> | |
| <br/><span style="color:#52525b;">Full spec-compliant pipeline · readability + BeautifulSoup DOM · inline citations · skip annotations · section nesting</span> | |
| </div>""" | |
| EXAMPLE_HTML = """<html lang="en"> | |
| <head><title>Sample Article</title></head> | |
| <body> | |
| <nav><a href="/">Home</a> | <a href="/blog">Blog</a> | <a href="/about">About</a></nav> | |
| <article> | |
| <h1>Why Token Efficiency Matters</h1> | |
| <p>Every page an LLM reads costs tokens. A typical web page contains | |
| <a href="https://example.com/html-bloat">thousands of tokens</a> of | |
| navigation, scripts, and styling that contribute nothing to understanding.</p> | |
| <h2>The Problem</h2> | |
| <p>Raw HTML wastes 80-95% of context window capacity on structural markup, | |
| <a href="https://example.com/css">CSS classes</a>, and | |
| <a href="https://example.com/js">JavaScript</a> that an LLM cannot execute.</p> | |
| <h2>The Solution</h2> | |
| <p>CTX strips everything an LLM doesn't need while preserving content, citations, | |
| and hierarchy. The result: the same information in a fraction of the tokens.</p> | |
| <table> | |
| <tr><th>Format</th><th>Tokens</th><th>Savings</th></tr> | |
| <tr><td>Raw HTML</td><td>45,000</td><td>baseline</td></tr> | |
| <tr><td>Markdown</td><td>4,500</td><td>90%</td></tr> | |
| <tr><td>CTX</td><td>3,400</td><td>92%</td></tr> | |
| </table> | |
| </article> | |
| <aside class="sidebar"><h3>Related Posts</h3><ul><li>Post 1</li><li>Post 2</li></ul></aside> | |
| <footer><p>© 2026 Example Inc. All rights reserved. Privacy Policy | Terms</p></footer> | |
| </body> | |
| </html>""" | |
| EXAMPLES = [ | |
| ["https://en.wikipedia.org/wiki/Large_language_model", "", "smart"], | |
| ["https://en.wikipedia.org/wiki/Shohei_Ohtani", "", "smart"], | |
| ["https://en.wikipedia.org/wiki/Python_(programming_language)", "", "fast"], | |
| ["https://en.wikipedia.org/wiki/Transformer_(deep_learning_architecture)", "", "smart"], | |
| ["https://news.ycombinator.com", "", "fast"], | |
| ["", EXAMPLE_HTML, "smart"], | |
| ] | |
| # --------------------------------------------------------------------------- | |
| # Build the app | |
| # --------------------------------------------------------------------------- | |
| try: | |
| _theme = gr.themes.Base( | |
| primary_hue="orange", | |
| neutral_hue="zinc", | |
| font=[gr.themes.GoogleFont("DM Sans"), "sans-serif"], | |
| font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "monospace"], | |
| ) | |
| except Exception: | |
| _theme = gr.themes.Base() | |
| with gr.Blocks(css=CUSTOM_CSS, theme=_theme) as demo: | |
| gr.HTML(HEADER_HTML) | |
| with gr.Row(): | |
| with gr.Column(scale=7): | |
| gr.HTML('<div class="section-label">→ Input</div>') | |
| with gr.Tab("URL"): | |
| url_input = gr.Textbox( | |
| label="Web page URL", | |
| placeholder="https://en.wikipedia.org/wiki/Large_language_model", | |
| lines=1, | |
| max_lines=1, | |
| ) | |
| with gr.Tab("Paste HTML"): | |
| html_input = gr.Textbox( | |
| label="Raw HTML", | |
| placeholder="<html>...</html>", | |
| lines=6, | |
| max_lines=12, | |
| ) | |
| with gr.Row(): | |
| tier_input = gr.Radio( | |
| choices=["fast", "smart"], | |
| value="smart", | |
| label="Extraction Tier", | |
| info="fast = DOM rules only (<500ms) · smart = DOM + regex NER (<1s)", | |
| ) | |
| convert_btn = gr.Button( | |
| "⚡ Convert to CTX", | |
| elem_classes=["convert-btn"], | |
| variant="primary", | |
| ) | |
| with gr.Column(scale=3): | |
| gr.HTML('<div class="section-label">→ What is CTX?</div>') | |
| gr.HTML(FORMAT_EXAMPLE) | |
| gr.HTML(PIPELINE_HTML) | |
| gr.HTML('<div class="section-label" style="margin-top:8px;">→ Results</div>') | |
| with gr.Row(): | |
| with gr.Column(scale=5): | |
| ctx_output = gr.Textbox( | |
| label="§ CTX Output", | |
| lines=24, | |
| max_lines=50, | |
| elem_classes=["ctx-output"], | |
| interactive=False, | |
| ) | |
| with gr.Column(scale=2): | |
| metrics_output = gr.HTML( | |
| value=_empty_metrics(), | |
| label="Metrics", | |
| elem_classes=["metrics-panel"], | |
| ) | |
| with gr.Accordion("Raw HTML (truncated)", open=False): | |
| html_preview = gr.Textbox( | |
| label="Source HTML", | |
| lines=12, | |
| max_lines=20, | |
| elem_classes=["html-preview"], | |
| interactive=False, | |
| ) | |
| gr.HTML('<div class="section-label" style="margin-top:4px;">→ Try these</div>') | |
| gr.Examples( | |
| examples=EXAMPLES, | |
| inputs=[url_input, html_input, tier_input], | |
| label="", | |
| ) | |
| convert_btn.click( | |
| fn=convert, | |
| inputs=[url_input, html_input, tier_input], | |
| outputs=[ctx_output, html_preview, metrics_output], | |
| ) | |
| url_input.submit( | |
| fn=convert, | |
| inputs=[url_input, html_input, tier_input], | |
| outputs=[ctx_output, html_preview, metrics_output], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |