""" Citadel Explorer -- Interactive demo for the Citadel AI Operations Platform. Showcases LLM gateway routing, HNSW vector search, ReAct agent traces, and observability dashboards. All demos use mock data; no API keys required. Source: https://github.com/dbhavery/citadel """ from __future__ import annotations import hashlib import math import random import re import time from dataclasses import dataclass, field from typing import Any import gradio as gr import matplotlib import matplotlib.pyplot as plt import numpy as np import plotly.graph_objects as go from plotly.subplots import make_subplots matplotlib.use("Agg") # --------------------------------------------------------------------------- # Shared constants # --------------------------------------------------------------------------- DARK_BG = "#0d1117" DARK_SURFACE = "#161b22" DARK_BORDER = "#30363d" ACCENT_BLUE = "#58a6ff" ACCENT_GREEN = "#3fb950" ACCENT_ORANGE = "#d29922" ACCENT_RED = "#f85149" TEXT_PRIMARY = "#e6edf3" TEXT_SECONDARY = "#8b949e" PROVIDER_COLORS = { "Ollama (local)": ACCENT_GREEN, "Claude (Anthropic)": ACCENT_BLUE, "Gemini (Google)": ACCENT_ORANGE, } CUSTOM_CSS = """ .dark { --body-background-fill: #0d1117 !important; --background-fill-primary: #161b22 !important; --background-fill-secondary: #0d1117 !important; --border-color-primary: #30363d !important; --block-label-text-color: #8b949e !important; --input-background-fill: #0d1117 !important; } .agent-trace { font-family: 'JetBrains Mono', 'Fira Code', 'Cascadia Code', monospace; font-size: 13px; line-height: 1.6; } .metric-card { border: 1px solid #30363d; border-radius: 8px; padding: 16px; background: #161b22; } footer { display: none !important; } """ # --------------------------------------------------------------------------- # Tab 1: LLM Gateway -- Routing Logic # --------------------------------------------------------------------------- SIMPLE_PATTERNS: list[re.Pattern[str]] = [ re.compile(r"\b(hello|hi|hey|thanks|bye|yes|no|ok)\b", re.IGNORECASE), re.compile(r"\bwhat (time|day|date)\b", re.IGNORECASE), re.compile(r"\bdefine\s+\w+\b", re.IGNORECASE), re.compile(r"\btranslate\b", re.IGNORECASE), ] COMPLEX_KEYWORDS: list[str] = [ "analyze", "architecture", "compare", "design", "evaluate", "explain why", "implement", "optimize", "refactor", "review", "security", "trade-off", "tradeoff", "vulnerability", "debug", "performance", ] MOCK_RESPONSES: dict[str, str] = { "Ollama (local)": ( "This is a straightforward request. Here is the answer based on my " "local knowledge base, processed entirely on-device with zero latency " "to external APIs." ), "Claude (Anthropic)": ( "I have analyzed your request carefully. This requires nuanced reasoning " "across multiple dimensions. Let me walk through the key considerations " "and provide a structured response with concrete recommendations." ), "Gemini (Google)": ( "Based on my analysis, this is a moderately complex request. I can provide " "a detailed response drawing on broad knowledge while maintaining " "cost-efficiency compared to heavier models." ), } @dataclass class RoutingDecision: provider: str reason: str latency_estimate_ms: int cost_estimate_usd: float decision_path: list[str] confidence: float def _compute_complexity_score(prompt: str) -> tuple[float, list[str]]: """Score prompt complexity on a 0-1 scale with an explanation trail.""" trail: list[str] = [] score = 0.0 # Length factor word_count = len(prompt.split()) length_score = min(word_count / 100.0, 1.0) * 0.3 score += length_score trail.append(f"Word count: {word_count} -> length factor: {length_score:.2f}") # Keyword factor -- scales with density of complex keywords keyword_hits = [kw for kw in COMPLEX_KEYWORDS if kw in prompt.lower()] keyword_score = min(len(keyword_hits) / 2.0, 1.0) * 0.5 score += keyword_score if keyword_hits: trail.append( f"Complex keywords found: [{', '.join(keyword_hits)}] " f"-> keyword factor: {keyword_score:.2f}" ) else: trail.append(f"No complex keywords -> keyword factor: 0.00") # Question depth (multiple questions imply complexity) question_count = prompt.count("?") question_score = min(question_count / 3.0, 1.0) * 0.15 score += question_score trail.append( f"Question marks: {question_count} -> question factor: {question_score:.2f}" ) # Code/technical markers code_markers = ["```", "def ", "class ", "function ", "SELECT ", "CREATE ", "import "] code_hits = [m for m in code_markers if m in prompt] code_score = min(len(code_hits) / 2.0, 1.0) * 0.15 score += code_score if code_hits: trail.append( f"Code markers: [{', '.join(code_hits)}] -> code factor: {code_score:.2f}" ) else: trail.append(f"No code markers -> code factor: 0.00") trail.append(f"Final complexity score: {score:.3f}") return min(score, 1.0), trail def route_prompt(prompt: str, complexity_override: str) -> RoutingDecision: """Determine which provider should handle this prompt.""" decision_path: list[str] = [] # Step 1: Check for regex-matched simple patterns decision_path.append("[1] Checking regex rules for trivial patterns...") if complexity_override == "simple": decision_path.append(" User override: complexity=simple") elif complexity_override == "complex": decision_path.append(" User override: complexity=complex") else: for pattern in SIMPLE_PATTERNS: if pattern.search(prompt): decision_path.append(f" Matched pattern: {pattern.pattern}") decision_path.append(" -> Route to Ollama (local, fast, free)") return RoutingDecision( provider="Ollama (local)", reason="Matched simple-pattern regex rule", latency_estimate_ms=random.randint(80, 250), cost_estimate_usd=0.0, decision_path=decision_path, confidence=0.95, ) decision_path.append(" No regex match") # Step 2: Complexity scoring decision_path.append("[2] Running complexity scorer...") score, trail = _compute_complexity_score(prompt) decision_path.extend(f" {line}" for line in trail) # Apply override if complexity_override == "simple": score = min(score, 0.2) decision_path.append(" Override applied: clamped score to <= 0.2") elif complexity_override == "complex": score = max(score, 0.7) decision_path.append(" Override applied: raised score to >= 0.7") # Step 3: Provider selection based on tier thresholds OLLAMA_CEILING = 0.25 GEMINI_CEILING = 0.55 decision_path.append( f"[3] Selecting provider (thresholds: " f"Ollama <{OLLAMA_CEILING}, Gemini <{GEMINI_CEILING}, Claude >={GEMINI_CEILING})..." ) if score < OLLAMA_CEILING: provider = "Ollama (local)" latency = random.randint(80, 300) cost = 0.0 decision_path.append(f" Score {score:.3f} < {OLLAMA_CEILING} -> Ollama (local)") elif score < GEMINI_CEILING: provider = "Gemini (Google)" latency = random.randint(400, 1200) cost = round(random.uniform(0.001, 0.008), 4) decision_path.append( f" Score {OLLAMA_CEILING} <= {score:.3f} < {GEMINI_CEILING} -> Gemini (Google)" ) else: provider = "Claude (Anthropic)" latency = random.randint(800, 2500) cost = round(random.uniform(0.005, 0.025), 4) decision_path.append(f" Score {score:.3f} >= {GEMINI_CEILING} -> Claude (Anthropic)") # Step 4: Circuit breaker check (simulated -- always healthy in demo) decision_path.append("[4] Circuit breaker check: all providers HEALTHY") decision_path.append(f"[5] Final decision: {provider}") return RoutingDecision( provider=provider, reason=f"Complexity score {score:.3f} routed to {provider}", latency_estimate_ms=latency, cost_estimate_usd=cost, decision_path=decision_path, confidence=round(0.7 + score * 0.25, 2), ) def handle_gateway_request( prompt: str, complexity: str ) -> tuple[str, str, str, str, str]: """Process a gateway routing request. Returns 5 strings for the UI outputs.""" if not prompt or not prompt.strip(): blank = "Enter a prompt above to see routing in action." return blank, "", "", "", "" decision = route_prompt(prompt.strip(), complexity.lower()) # Provider badge provider_display = ( f"**{decision.provider}**\n\n" f"Confidence: {decision.confidence:.0%}" ) # Metrics metrics_display = ( f"**Estimated Latency:** {decision.latency_estimate_ms} ms\n\n" f"**Estimated Cost:** ${decision.cost_estimate_usd:.4f}\n\n" f"**Reason:** {decision.reason}" ) # Decision tree tree_display = "```\n" + "\n".join(decision.decision_path) + "\n```" # Mock response mock_response = ( f"**[{decision.provider}]** (simulated)\n\n" f"{MOCK_RESPONSES[decision.provider]}" ) # Cache status (simulate) cache_hash = hashlib.md5(prompt.encode()).hexdigest()[:12] cache_display = ( f"**Cache Key:** `{cache_hash}`\n\n" f"**Cache Status:** MISS (first request)\n\n" f"**TTL:** 3600s\n\n" f"Subsequent identical prompts would return cached response " f"with <5ms latency and $0.00 cost." ) return provider_display, metrics_display, tree_display, mock_response, cache_display # --------------------------------------------------------------------------- # Tab 2: HNSW Vector Search # --------------------------------------------------------------------------- SENTENCES: list[str] = [ "Neural networks learn hierarchical representations of data", "Transformers use self-attention mechanisms for sequence modeling", "Gradient descent optimizes model parameters iteratively", "Convolutional neural networks excel at image recognition tasks", "Recurrent neural networks process sequential data with memory", "Generative adversarial networks create realistic synthetic data", "Transfer learning reuses pretrained models for new tasks", "Reinforcement learning agents learn through reward signals", "Natural language processing enables machines to understand text", "Computer vision systems interpret and analyze visual information", "Embeddings represent discrete objects as continuous vectors", "Attention mechanisms allow models to focus on relevant inputs", "Backpropagation computes gradients through computational graphs", "Batch normalization stabilizes and accelerates neural network training", "Dropout regularization prevents overfitting in deep networks", "Learning rate scheduling improves convergence during training", "Data augmentation increases training set diversity artificially", "Hyperparameter tuning optimizes model configuration systematically", "Cross-validation estimates model performance on unseen data", "Feature engineering transforms raw data into informative representations", "Dimensionality reduction compresses high-dimensional data efficiently", "Clustering algorithms group similar data points together", "Decision trees split data based on feature thresholds", "Random forests combine multiple decision trees for robustness", "Support vector machines find optimal separating hyperplanes", "K-nearest neighbors classifies based on proximity in feature space", "Principal component analysis finds directions of maximum variance", "Autoencoders learn compressed representations through reconstruction", "Variational autoencoders generate new samples from learned distributions", "Graph neural networks operate on graph-structured data", "Federated learning trains models across decentralized data sources", "Differential privacy adds noise to protect individual data points", "Model distillation compresses large models into smaller ones", "Quantization reduces model size by lowering numerical precision", "Pruning removes unnecessary weights from neural networks", "ONNX provides a standard format for neural network interchange", "TensorRT optimizes models for inference on NVIDIA hardware", "Vector databases enable efficient similarity search at scale", "Retrieval augmented generation combines search with language models", "Prompt engineering designs effective inputs for language models", "Fine-tuning adapts pretrained models to specific domains", "RLHF aligns language models with human preferences", "Chain of thought prompting improves reasoning in language models", "Multi-modal models process text images and audio together", "Tokenization converts text into numerical sequences for models", "Beam search explores multiple decoding paths simultaneously", "Temperature scaling controls randomness in model generation", "Contrastive learning trains models by comparing positive and negative pairs", "Self-supervised learning extracts labels from the data itself", "Few-shot learning enables models to learn from minimal examples", ] def _deterministic_embed(text: str, dim: int = 64) -> np.ndarray: """Generate a deterministic pseudo-embedding from text content. Uses character-level hashing with trigram overlap to produce vectors where semantically similar sentences get closer embeddings. This is not a real language model -- it is a reproducible approximation that makes the demo meaningful without any ML dependencies. """ rng = np.random.RandomState( int(hashlib.sha256(text.lower().encode()).hexdigest(), 16) % (2**31) ) base = rng.randn(dim).astype(np.float32) # Add trigram-based signal so overlapping words produce closer vectors words = text.lower().split() for word in words: word_seed = int(hashlib.md5(word.encode()).hexdigest(), 16) % (2**31) word_rng = np.random.RandomState(word_seed) base += word_rng.randn(dim).astype(np.float32) * 0.3 norm = np.linalg.norm(base) if norm > 0: base /= norm return base # Pre-compute corpus embeddings at module load _CORPUS_EMBEDDINGS: np.ndarray = np.stack( [_deterministic_embed(s) for s in SENTENCES] ) def _cosine_similarity(a: np.ndarray, b: np.ndarray) -> float: dot = float(np.dot(a, b)) norm_a = float(np.linalg.norm(a)) norm_b = float(np.linalg.norm(b)) if norm_a == 0 or norm_b == 0: return 0.0 return dot / (norm_a * norm_b) def _search_vectors( query: str, top_k: int = 8 ) -> list[tuple[str, float, int]]: """Search the corpus and return (sentence, similarity, index) tuples.""" query_vec = _deterministic_embed(query) similarities = _CORPUS_EMBEDDINGS @ query_vec top_indices = np.argsort(similarities)[::-1][:top_k] return [ (SENTENCES[i], float(similarities[i]), int(i)) for i in top_indices ] def _build_vector_plot( query: str, results: list[tuple[str, float, int]] ) -> plt.Figure: """Build a 2D PCA-like projection of the vector space highlighting results.""" # Simple 2D projection using first two principal-ish components # (deterministic random projection for consistency) rng = np.random.RandomState(42) projection_matrix = rng.randn(64, 2).astype(np.float32) projection_matrix /= np.linalg.norm(projection_matrix, axis=0, keepdims=True) all_2d = _CORPUS_EMBEDDINGS @ projection_matrix query_vec = _deterministic_embed(query) query_2d = query_vec @ projection_matrix result_indices = {r[2] for r in results} fig, ax = plt.subplots(figsize=(8, 6)) fig.patch.set_facecolor(DARK_BG) ax.set_facecolor(DARK_SURFACE) # Plot all corpus points non_result_mask = np.array( [i not in result_indices for i in range(len(SENTENCES))] ) ax.scatter( all_2d[non_result_mask, 0], all_2d[non_result_mask, 1], c=TEXT_SECONDARY, alpha=0.3, s=20, label="Corpus", ) # Plot result points for sentence, sim, idx in results: ax.scatter( all_2d[idx, 0], all_2d[idx, 1], c=ACCENT_BLUE, alpha=max(0.4, sim), s=60 + sim * 80, zorder=5, ) # Draw line from query to result ax.plot( [query_2d[0], all_2d[idx, 0]], [query_2d[1], all_2d[idx, 1]], color=ACCENT_BLUE, alpha=0.15 + sim * 0.3, linewidth=0.8, ) # Plot query point ax.scatter( query_2d[0], query_2d[1], c=ACCENT_RED, s=120, marker="*", zorder=10, label="Query", ) ax.set_title( "Vector Space Projection (2D)", color=TEXT_PRIMARY, fontsize=13, fontweight="bold", pad=12, ) ax.tick_params(colors=TEXT_SECONDARY, labelsize=8) for spine in ax.spines.values(): spine.set_color(DARK_BORDER) ax.legend( facecolor=DARK_SURFACE, edgecolor=DARK_BORDER, labelcolor=TEXT_PRIMARY, fontsize=9, ) fig.tight_layout() return fig def handle_vector_search(query: str) -> tuple[str, Any]: """Run vector search and return results markdown + plot.""" if not query or not query.strip(): return "Enter a query to search the vector corpus.", None start = time.perf_counter() results = _search_vectors(query.strip()) elapsed_us = (time.perf_counter() - start) * 1_000_000 lines = [ f"**Query:** {query.strip()}\n", f"**Search time:** {elapsed_us:.0f} us | " f"**Corpus size:** {len(SENTENCES)} sentences | " f"**Embedding dim:** 64\n", "---\n", "| Rank | Similarity | Sentence |", "|------|-----------|----------|", ] for rank, (sentence, sim, _idx) in enumerate(results, 1): bar_length = int(sim * 20) bar = "+" * bar_length + "-" * (20 - bar_length) lines.append(f"| {rank} | `{sim:.4f}` [{bar}] | {sentence} |") lines.append("\n---\n") lines.append( "*Embeddings are deterministic pseudo-vectors (trigram hashing). " "In production, Citadel uses real transformer embeddings with " "a custom HNSW index supporting millions of vectors.*" ) fig = _build_vector_plot(query.strip(), results) return "\n".join(lines), fig # --------------------------------------------------------------------------- # Tab 3: ReAct Agent Runtime # --------------------------------------------------------------------------- AGENT_SCENARIOS: dict[str, list[dict[str, str]]] = { "research": [ { "thought": ( "The user wants me to research a topic. I should start by " "searching for relevant information, then synthesize findings." ), "action": "search_knowledge_base", "action_input": '{"query": "", "top_k": 5}', "observation": ( "Found 5 relevant documents:\n" " [1] Overview and fundamentals (relevance: 0.92)\n" " [2] Recent developments in 2025 (relevance: 0.87)\n" " [3] Comparison with alternative approaches (relevance: 0.81)\n" " [4] Implementation best practices (relevance: 0.76)\n" " [5] Common pitfalls and solutions (relevance: 0.71)" ), }, { "thought": ( "I have foundational documents. Let me check for any recent " "updates or corrections to ensure accuracy." ), "action": "web_search", "action_input": '{"query": " latest developments 2026"}', "observation": ( "3 recent results found:\n" " [1] New benchmark results published March 2026\n" " [2] Industry adoption report Q1 2026\n" " [3] Critical vulnerability patch released Feb 2026" ), }, { "thought": ( "I now have both historical context and recent updates. " "I can synthesize a comprehensive answer. The vulnerability " "patch in result [3] is important context to include." ), "action": "FINAL_ANSWER", "action_input": "", "observation": "", }, ], "code": [ { "thought": ( "The user needs code assistance. I should first understand " "the codebase structure before making changes." ), "action": "read_file", "action_input": '{"path": "src/main.py"}', "observation": ( "File contents (247 lines):\n" " - Class: ApplicationServer (lines 12-180)\n" " - Function: configure_routes (lines 182-220)\n" " - Function: main (lines 222-247)\n" " Dependencies: fastapi, uvicorn, pydantic" ), }, { "thought": ( "I see the structure. The ApplicationServer class handles " "routing. I need to check the test file to understand " "expected behavior before modifying anything." ), "action": "read_file", "action_input": '{"path": "tests/test_main.py"}', "observation": ( "Test file (89 lines):\n" " - test_server_startup: verifies port binding\n" " - test_health_endpoint: checks /health returns 200\n" " - test_graceful_shutdown: ensures cleanup on SIGTERM\n" " All 3 tests currently passing" ), }, { "thought": ( "I understand the codebase and test expectations. I can now " "implement the requested change while maintaining backward " "compatibility with existing tests." ), "action": "write_file", "action_input": '{"path": "src/main.py", "changes": "..."}', "observation": "File updated successfully. Running tests...\n 3/3 tests passing. No regressions.", }, { "thought": "The change is implemented and all tests pass. I can provide the final answer with a summary of what changed.", "action": "FINAL_ANSWER", "action_input": "", "observation": "", }, ], "analysis": [ { "thought": ( "The user wants data analysis. I should first retrieve the " "relevant dataset and understand its schema." ), "action": "query_database", "action_input": '{"sql": "SELECT column_name, data_type FROM information_schema.columns WHERE table_name = \'metrics\'"}', "observation": ( "Schema for 'metrics' table:\n" " - timestamp: DATETIME (indexed)\n" " - provider: VARCHAR(50)\n" " - latency_ms: FLOAT\n" " - tokens_used: INTEGER\n" " - cost_usd: DECIMAL(10,6)\n" " - status_code: INTEGER\n" " Total rows: 142,847" ), }, { "thought": ( "Good schema understanding. Now I need to run the analytical " "query the user is interested in. Let me aggregate by provider." ), "action": "query_database", "action_input": ( '{"sql": "SELECT provider, COUNT(*) as requests, ' "AVG(latency_ms) as avg_latency, SUM(cost_usd) as total_cost " "FROM metrics WHERE timestamp > '2026-02-01' " 'GROUP BY provider ORDER BY requests DESC"}' ), "observation": ( "Results:\n" " | Provider | Requests | Avg Latency | Total Cost |\n" " |----------|----------|-------------|------------|\n" " | Ollama | 89,234 | 142ms | $0.00 |\n" " | Gemini | 38,991 | 687ms | $12.47 |\n" " | Claude | 14,622 | 1,284ms | $48.93 |" ), }, { "thought": ( "Clear picture: Ollama handles the bulk of requests at zero " "cost, Gemini is the middle tier, and Claude handles complex " "queries at higher cost but lower volume. The routing strategy " "is working as designed. I can now present findings." ), "action": "FINAL_ANSWER", "action_input": "", "observation": "", }, ], } def _select_scenario(task: str) -> tuple[str, list[dict[str, str]]]: """Pick the most relevant scenario based on task keywords.""" task_lower = task.lower() if any(kw in task_lower for kw in ["code", "implement", "fix", "bug", "refactor", "function"]): return "code", AGENT_SCENARIOS["code"] if any(kw in task_lower for kw in ["data", "analyz", "metric", "query", "report", "stats"]): return "analysis", AGENT_SCENARIOS["analysis"] return "research", AGENT_SCENARIOS["research"] def handle_agent_task(task: str) -> str: """Generate a ReAct agent trace for the given task.""" if not task or not task.strip(): return "Enter a task above to see the ReAct agent reasoning loop." scenario_type, steps = _select_scenario(task.strip()) topic = task.strip() lines: list[str] = [ f"## ReAct Agent Trace\n", f"**Task:** {topic}\n", f"**Scenario type:** {scenario_type}\n", f"**Registered tools:** search_knowledge_base, web_search, read_file, " f"write_file, query_database, execute_code\n", "---\n", ] for step_num, step in enumerate(steps, 1): thought = step["thought"].replace("", topic) action = step["action"] action_input = step["action_input"].replace("", topic) observation = step["observation"].replace("", topic) lines.append(f"### Step {step_num}\n") lines.append(f"**Thought:** {thought}\n") if action == "FINAL_ANSWER": lines.append("**Action:** `FINAL_ANSWER`\n") lines.append( f"**Result:** Based on the information gathered across " f"{step_num - 1} tool invocations, I have synthesized a " f"comprehensive response to the user's request regarding " f"*{topic}*.\n" ) else: lines.append(f"**Action:** `{action}({action_input})`\n") lines.append(f"**Observation:**\n```\n{observation}\n```\n") lines.append("---\n") lines.append( f"**Agent completed in {len(steps)} steps " f"({len(steps) - 1} tool calls + final answer)**\n\n" f"*In production, Citadel's agent runtime executes real tool calls " f"with timeout handling, retry logic, and full observability tracing.*" ) return "\n".join(lines) # --------------------------------------------------------------------------- # Tab 4: Observability Dashboard # --------------------------------------------------------------------------- def _generate_timeseries( hours: int = 24, base_rate: float = 50.0, noise: float = 15.0, trend: float = 0.5, ) -> tuple[list[str], list[float]]: """Generate realistic-looking time series data.""" rng = random.Random(42) timestamps = [] values = [] for h in range(hours): # Simulate daily pattern: lower at night, higher during day hour_of_day = h % 24 daily_factor = 0.5 + 0.5 * math.sin((hour_of_day - 6) * math.pi / 12) value = base_rate * daily_factor + trend * h + rng.gauss(0, noise) timestamps.append(f"{h:02d}:00") values.append(max(0, value)) return timestamps, values def build_observability_dashboard() -> tuple[Any, str]: """Build the observability charts and metrics summary.""" rng = random.Random(42) # Generate data hours = 24 timestamps, rps_values = _generate_timeseries(hours, 45, 12, 0.3) p50_latencies = [80 + rng.gauss(0, 15) + 20 * math.sin(i * 0.3) for i in range(hours)] p95_latencies = [lat * (2.5 + rng.gauss(0, 0.3)) for lat in p50_latencies] provider_requests = {"Ollama": 62_340, "Gemini": 27_891, "Claude": 10_244} provider_costs = {"Ollama": 0.0, "Gemini": 8.94, "Claude": 34.21} provider_errors = {"Ollama": 12, "Gemini": 47, "Claude": 8} token_usage = { "Prompt tokens": 2_847_291, "Completion tokens": 1_423_886, "Cached tokens": 891_204, } # Build plotly figure with subplots # Bottom-right cell is "domain" type to support Pie chart fig = make_subplots( rows=2, cols=2, subplot_titles=( "Requests per Second (24h)", "Latency Distribution (p50 / p95)", "Cost by Provider", "Token Usage Breakdown", ), specs=[ [{"type": "xy"}, {"type": "xy"}], [{"type": "xy"}, {"type": "domain"}], ], vertical_spacing=0.14, horizontal_spacing=0.10, ) # Chart 1: RPS time series fig.add_trace( go.Scatter( x=timestamps, y=rps_values, mode="lines", name="req/s", line=dict(color=ACCENT_BLUE, width=2), fill="tozeroy", fillcolor="rgba(88, 166, 255, 0.1)", ), row=1, col=1, ) # Chart 2: Latency fig.add_trace( go.Scatter( x=timestamps, y=p50_latencies, mode="lines", name="p50", line=dict(color=ACCENT_GREEN, width=2), ), row=1, col=2, ) fig.add_trace( go.Scatter( x=timestamps, y=p95_latencies, mode="lines", name="p95", line=dict(color=ACCENT_ORANGE, width=2), ), row=1, col=2, ) # Chart 3: Cost by provider (bar) providers = list(provider_costs.keys()) costs = list(provider_costs.values()) colors = [ACCENT_GREEN, ACCENT_ORANGE, ACCENT_BLUE] fig.add_trace( go.Bar( x=providers, y=costs, name="Cost ($)", marker_color=colors, text=[f"${c:.2f}" for c in costs], textposition="outside", textfont=dict(color=TEXT_PRIMARY), ), row=2, col=1, ) # Chart 4: Token usage (pie) fig.add_trace( go.Pie( labels=list(token_usage.keys()), values=list(token_usage.values()), marker=dict(colors=[ACCENT_BLUE, ACCENT_GREEN, ACCENT_ORANGE]), textinfo="label+percent", textfont=dict(color=TEXT_PRIMARY, size=11), hole=0.4, ), row=2, col=2, ) # Style fig.update_layout( height=620, paper_bgcolor=DARK_BG, plot_bgcolor=DARK_SURFACE, font=dict(color=TEXT_PRIMARY, size=11), showlegend=True, legend=dict( bgcolor=DARK_SURFACE, bordercolor=DARK_BORDER, font=dict(color=TEXT_PRIMARY), ), margin=dict(t=40, b=30, l=50, r=30), ) for annotation in fig.layout.annotations: annotation.font = dict(color=TEXT_PRIMARY, size=12) # Style only the XY subplot axes (bottom-right is domain type, no axes) for axis_name in ["xaxis", "xaxis2", "xaxis3"]: fig.layout[axis_name].gridcolor = DARK_BORDER fig.layout[axis_name].tickfont = dict(color=TEXT_SECONDARY) for axis_name in ["yaxis", "yaxis2", "yaxis3"]: fig.layout[axis_name].gridcolor = DARK_BORDER fig.layout[axis_name].tickfont = dict(color=TEXT_SECONDARY) # Metrics summary total_requests = sum(provider_requests.values()) total_cost = sum(provider_costs.values()) total_errors = sum(provider_errors.values()) error_rate = total_errors / total_requests * 100 avg_p50 = sum(p50_latencies) / len(p50_latencies) avg_p95 = sum(p95_latencies) / len(p95_latencies) summary_lines = [ "## Summary Metrics (24h window)\n", "| Metric | Value |", "|--------|-------|", f"| Total requests | {total_requests:,} |", f"| Avg requests/sec | {total_requests / 86400:.1f} |", f"| p50 latency | {avg_p50:.0f} ms |", f"| p95 latency | {avg_p95:.0f} ms |", f"| Total cost | ${total_cost:.2f} |", f"| Cost per request | ${total_cost / total_requests:.6f} |", f"| Total errors | {total_errors:,} |", f"| Error rate | {error_rate:.3f}% |", f"| Total tokens | {sum(token_usage.values()):,} |", f"| Cache hit rate | {token_usage['Cached tokens'] / sum(token_usage.values()) * 100:.1f}% |", "\n---\n", "### Provider Breakdown\n", "| Provider | Requests | Cost | Errors | Error Rate |", "|----------|----------|------|--------|------------|", ] for provider in providers: req = provider_requests[provider] cost = provider_costs[provider] err = provider_errors[provider] erate = err / req * 100 summary_lines.append( f"| {provider} | {req:,} | ${cost:.2f} | {err} | {erate:.3f}% |" ) summary_lines.append( "\n*Metrics are simulated for demonstration. In production, " "citadel-observe collects real telemetry via OpenTelemetry-compatible " "exporters with Prometheus/Grafana integration.*" ) return fig, "\n".join(summary_lines) # --------------------------------------------------------------------------- # Gradio Application # --------------------------------------------------------------------------- def build_app() -> gr.Blocks: """Construct the Gradio Blocks application.""" with gr.Blocks( title="Citadel -- AI Operations Platform", theme=gr.themes.Base( primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.gray, neutral_hue=gr.themes.colors.gray, font=gr.themes.GoogleFont("Inter"), font_mono=gr.themes.GoogleFont("JetBrains Mono"), ).set( body_background_fill=DARK_BG, body_background_fill_dark=DARK_BG, block_background_fill=DARK_SURFACE, block_background_fill_dark=DARK_SURFACE, block_border_color=DARK_BORDER, block_border_color_dark=DARK_BORDER, input_background_fill="#0d1117", input_background_fill_dark="#0d1117", button_primary_background_fill=ACCENT_BLUE, button_primary_background_fill_dark=ACCENT_BLUE, button_primary_text_color="#ffffff", button_primary_text_color_dark="#ffffff", ), css=CUSTOM_CSS, ) as app: gr.Markdown( f""" # Citadel -- AI Operations Platform Production-grade AI infrastructure built from first principles. LLM gateway | Vector search | Agent runtime | Observability [GitHub](https://github.com/dbhavery/citadel) """, ) # Tab 1: LLM Gateway with gr.Tab("LLM Gateway"): gr.Markdown( "### Multi-Provider Routing Engine\n" "Enter a prompt and see how Citadel's gateway routes it to the " "optimal provider based on complexity analysis, regex rules, " "and cost/latency trade-offs." ) with gr.Row(): with gr.Column(scale=3): gateway_input = gr.Textbox( label="Prompt", placeholder="Try: 'Hello' (simple) or 'Analyze the security implications of...' (complex)", lines=3, ) with gr.Column(scale=1): complexity_selector = gr.Radio( choices=["Auto", "Simple", "Moderate", "Complex"], value="Auto", label="Complexity Override", ) gateway_btn = gr.Button("Route Request", variant="primary") with gr.Row(): provider_output = gr.Markdown(label="Selected Provider") metrics_output = gr.Markdown(label="Routing Metrics") cache_output = gr.Markdown(label="Cache Layer") with gr.Row(): with gr.Column(scale=1): tree_output = gr.Markdown(label="Decision Path") with gr.Column(scale=1): response_output = gr.Markdown(label="Mock Response") gateway_btn.click( fn=handle_gateway_request, inputs=[gateway_input, complexity_selector], outputs=[ provider_output, metrics_output, tree_output, response_output, cache_output, ], ) # Also trigger on Enter gateway_input.submit( fn=handle_gateway_request, inputs=[gateway_input, complexity_selector], outputs=[ provider_output, metrics_output, tree_output, response_output, cache_output, ], ) # Tab 2: Vector Search with gr.Tab("HNSW Vector Search"): gr.Markdown( "### Nearest Neighbor Search\n" "Search a corpus of 50 AI/ML sentences using cosine similarity. " "The HNSW index in production supports millions of vectors with " "sub-millisecond lookup. This demo uses a simplified " "embedding model for illustration." ) with gr.Row(): with gr.Column(scale=3): vector_input = gr.Textbox( label="Search Query", placeholder="Try: 'how do transformers work' or 'reducing model size'", lines=1, ) with gr.Column(scale=1): vector_btn = gr.Button("Search Vectors", variant="primary") vector_results = gr.Markdown(label="Search Results") vector_plot = gr.Plot(label="Vector Space Visualization") vector_btn.click( fn=handle_vector_search, inputs=[vector_input], outputs=[vector_results, vector_plot], ) vector_input.submit( fn=handle_vector_search, inputs=[vector_input], outputs=[vector_results, vector_plot], ) # Tab 3: Agent Runtime with gr.Tab("Agent Runtime"): gr.Markdown( "### ReAct Agent Reasoning Loop\n" "Enter a task and see how Citadel's agent runtime decomposes " "it into a Thought-Action-Observation cycle. The agent selects " "tools, processes results, and builds toward a final answer." ) with gr.Row(): with gr.Column(scale=3): agent_input = gr.Textbox( label="Task", placeholder="Try: 'Research vector databases' or 'Fix the login bug' or 'Analyze API latency trends'", lines=2, ) with gr.Column(scale=1): agent_btn = gr.Button("Run Agent", variant="primary") agent_output = gr.Markdown( label="Agent Trace", elem_classes=["agent-trace"], ) agent_btn.click( fn=handle_agent_task, inputs=[agent_input], outputs=[agent_output], ) agent_input.submit( fn=handle_agent_task, inputs=[agent_input], outputs=[agent_output], ) # Tab 4: Observability with gr.Tab("Observability"): gr.Markdown( "### Operations Dashboard\n" "Real-time monitoring of the Citadel platform. Request rates, " "latency percentiles, cost tracking, and token usage -- " "everything you need to operate an AI system in production." ) refresh_btn = gr.Button("Refresh Dashboard", variant="primary") obs_plot = gr.Plot(label="Dashboard Charts") obs_summary = gr.Markdown(label="Metrics Summary") # Load on page open app.load( fn=build_observability_dashboard, outputs=[obs_plot, obs_summary], ) refresh_btn.click( fn=build_observability_dashboard, outputs=[obs_plot, obs_summary], ) return app # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- if __name__ == "__main__": application = build_app() application.launch( server_name="0.0.0.0", server_port=7860, show_error=True, )