Spaces:
Sleeping
Sleeping
| """ | |
| Citadel Explorer -- Interactive demo for the Citadel AI Operations Platform. | |
| Showcases LLM gateway routing, HNSW vector search, ReAct agent traces, | |
| and observability dashboards. All demos use mock data; no API keys required. | |
| Source: https://github.com/dbhavery/citadel | |
| """ | |
| from __future__ import annotations | |
| import hashlib | |
| import math | |
| import random | |
| import re | |
| import time | |
| from dataclasses import dataclass, field | |
| from typing import Any | |
| import gradio as gr | |
| import matplotlib | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| import plotly.graph_objects as go | |
| from plotly.subplots import make_subplots | |
| matplotlib.use("Agg") | |
| # --------------------------------------------------------------------------- | |
| # Shared constants | |
| # --------------------------------------------------------------------------- | |
| DARK_BG = "#0d1117" | |
| DARK_SURFACE = "#161b22" | |
| DARK_BORDER = "#30363d" | |
| ACCENT_BLUE = "#58a6ff" | |
| ACCENT_GREEN = "#3fb950" | |
| ACCENT_ORANGE = "#d29922" | |
| ACCENT_RED = "#f85149" | |
| TEXT_PRIMARY = "#e6edf3" | |
| TEXT_SECONDARY = "#8b949e" | |
| PROVIDER_COLORS = { | |
| "Ollama (local)": ACCENT_GREEN, | |
| "Claude (Anthropic)": ACCENT_BLUE, | |
| "Gemini (Google)": ACCENT_ORANGE, | |
| } | |
| CUSTOM_CSS = """ | |
| .dark { | |
| --body-background-fill: #0d1117 !important; | |
| --background-fill-primary: #161b22 !important; | |
| --background-fill-secondary: #0d1117 !important; | |
| --border-color-primary: #30363d !important; | |
| --block-label-text-color: #8b949e !important; | |
| --input-background-fill: #0d1117 !important; | |
| } | |
| .agent-trace { | |
| font-family: 'JetBrains Mono', 'Fira Code', 'Cascadia Code', monospace; | |
| font-size: 13px; | |
| line-height: 1.6; | |
| } | |
| .metric-card { | |
| border: 1px solid #30363d; | |
| border-radius: 8px; | |
| padding: 16px; | |
| background: #161b22; | |
| } | |
| footer { display: none !important; } | |
| """ | |
| # --------------------------------------------------------------------------- | |
| # Tab 1: LLM Gateway -- Routing Logic | |
| # --------------------------------------------------------------------------- | |
| SIMPLE_PATTERNS: list[re.Pattern[str]] = [ | |
| re.compile(r"\b(hello|hi|hey|thanks|bye|yes|no|ok)\b", re.IGNORECASE), | |
| re.compile(r"\bwhat (time|day|date)\b", re.IGNORECASE), | |
| re.compile(r"\bdefine\s+\w+\b", re.IGNORECASE), | |
| re.compile(r"\btranslate\b", re.IGNORECASE), | |
| ] | |
| COMPLEX_KEYWORDS: list[str] = [ | |
| "analyze", "architecture", "compare", "design", "evaluate", "explain why", | |
| "implement", "optimize", "refactor", "review", "security", "trade-off", | |
| "tradeoff", "vulnerability", "debug", "performance", | |
| ] | |
| MOCK_RESPONSES: dict[str, str] = { | |
| "Ollama (local)": ( | |
| "This is a straightforward request. Here is the answer based on my " | |
| "local knowledge base, processed entirely on-device with zero latency " | |
| "to external APIs." | |
| ), | |
| "Claude (Anthropic)": ( | |
| "I have analyzed your request carefully. This requires nuanced reasoning " | |
| "across multiple dimensions. Let me walk through the key considerations " | |
| "and provide a structured response with concrete recommendations." | |
| ), | |
| "Gemini (Google)": ( | |
| "Based on my analysis, this is a moderately complex request. I can provide " | |
| "a detailed response drawing on broad knowledge while maintaining " | |
| "cost-efficiency compared to heavier models." | |
| ), | |
| } | |
| class RoutingDecision: | |
| provider: str | |
| reason: str | |
| latency_estimate_ms: int | |
| cost_estimate_usd: float | |
| decision_path: list[str] | |
| confidence: float | |
| def _compute_complexity_score(prompt: str) -> tuple[float, list[str]]: | |
| """Score prompt complexity on a 0-1 scale with an explanation trail.""" | |
| trail: list[str] = [] | |
| score = 0.0 | |
| # Length factor | |
| word_count = len(prompt.split()) | |
| length_score = min(word_count / 100.0, 1.0) * 0.3 | |
| score += length_score | |
| trail.append(f"Word count: {word_count} -> length factor: {length_score:.2f}") | |
| # Keyword factor -- scales with density of complex keywords | |
| keyword_hits = [kw for kw in COMPLEX_KEYWORDS if kw in prompt.lower()] | |
| keyword_score = min(len(keyword_hits) / 2.0, 1.0) * 0.5 | |
| score += keyword_score | |
| if keyword_hits: | |
| trail.append( | |
| f"Complex keywords found: [{', '.join(keyword_hits)}] " | |
| f"-> keyword factor: {keyword_score:.2f}" | |
| ) | |
| else: | |
| trail.append(f"No complex keywords -> keyword factor: 0.00") | |
| # Question depth (multiple questions imply complexity) | |
| question_count = prompt.count("?") | |
| question_score = min(question_count / 3.0, 1.0) * 0.15 | |
| score += question_score | |
| trail.append( | |
| f"Question marks: {question_count} -> question factor: {question_score:.2f}" | |
| ) | |
| # Code/technical markers | |
| code_markers = ["```", "def ", "class ", "function ", "SELECT ", "CREATE ", "import "] | |
| code_hits = [m for m in code_markers if m in prompt] | |
| code_score = min(len(code_hits) / 2.0, 1.0) * 0.15 | |
| score += code_score | |
| if code_hits: | |
| trail.append( | |
| f"Code markers: [{', '.join(code_hits)}] -> code factor: {code_score:.2f}" | |
| ) | |
| else: | |
| trail.append(f"No code markers -> code factor: 0.00") | |
| trail.append(f"Final complexity score: {score:.3f}") | |
| return min(score, 1.0), trail | |
| def route_prompt(prompt: str, complexity_override: str) -> RoutingDecision: | |
| """Determine which provider should handle this prompt.""" | |
| decision_path: list[str] = [] | |
| # Step 1: Check for regex-matched simple patterns | |
| decision_path.append("[1] Checking regex rules for trivial patterns...") | |
| if complexity_override == "simple": | |
| decision_path.append(" User override: complexity=simple") | |
| elif complexity_override == "complex": | |
| decision_path.append(" User override: complexity=complex") | |
| else: | |
| for pattern in SIMPLE_PATTERNS: | |
| if pattern.search(prompt): | |
| decision_path.append(f" Matched pattern: {pattern.pattern}") | |
| decision_path.append(" -> Route to Ollama (local, fast, free)") | |
| return RoutingDecision( | |
| provider="Ollama (local)", | |
| reason="Matched simple-pattern regex rule", | |
| latency_estimate_ms=random.randint(80, 250), | |
| cost_estimate_usd=0.0, | |
| decision_path=decision_path, | |
| confidence=0.95, | |
| ) | |
| decision_path.append(" No regex match") | |
| # Step 2: Complexity scoring | |
| decision_path.append("[2] Running complexity scorer...") | |
| score, trail = _compute_complexity_score(prompt) | |
| decision_path.extend(f" {line}" for line in trail) | |
| # Apply override | |
| if complexity_override == "simple": | |
| score = min(score, 0.2) | |
| decision_path.append(" Override applied: clamped score to <= 0.2") | |
| elif complexity_override == "complex": | |
| score = max(score, 0.7) | |
| decision_path.append(" Override applied: raised score to >= 0.7") | |
| # Step 3: Provider selection based on tier thresholds | |
| OLLAMA_CEILING = 0.25 | |
| GEMINI_CEILING = 0.55 | |
| decision_path.append( | |
| f"[3] Selecting provider (thresholds: " | |
| f"Ollama <{OLLAMA_CEILING}, Gemini <{GEMINI_CEILING}, Claude >={GEMINI_CEILING})..." | |
| ) | |
| if score < OLLAMA_CEILING: | |
| provider = "Ollama (local)" | |
| latency = random.randint(80, 300) | |
| cost = 0.0 | |
| decision_path.append(f" Score {score:.3f} < {OLLAMA_CEILING} -> Ollama (local)") | |
| elif score < GEMINI_CEILING: | |
| provider = "Gemini (Google)" | |
| latency = random.randint(400, 1200) | |
| cost = round(random.uniform(0.001, 0.008), 4) | |
| decision_path.append( | |
| f" Score {OLLAMA_CEILING} <= {score:.3f} < {GEMINI_CEILING} -> Gemini (Google)" | |
| ) | |
| else: | |
| provider = "Claude (Anthropic)" | |
| latency = random.randint(800, 2500) | |
| cost = round(random.uniform(0.005, 0.025), 4) | |
| decision_path.append(f" Score {score:.3f} >= {GEMINI_CEILING} -> Claude (Anthropic)") | |
| # Step 4: Circuit breaker check (simulated -- always healthy in demo) | |
| decision_path.append("[4] Circuit breaker check: all providers HEALTHY") | |
| decision_path.append(f"[5] Final decision: {provider}") | |
| return RoutingDecision( | |
| provider=provider, | |
| reason=f"Complexity score {score:.3f} routed to {provider}", | |
| latency_estimate_ms=latency, | |
| cost_estimate_usd=cost, | |
| decision_path=decision_path, | |
| confidence=round(0.7 + score * 0.25, 2), | |
| ) | |
| def handle_gateway_request( | |
| prompt: str, complexity: str | |
| ) -> tuple[str, str, str, str, str]: | |
| """Process a gateway routing request. Returns 5 strings for the UI outputs.""" | |
| if not prompt or not prompt.strip(): | |
| blank = "Enter a prompt above to see routing in action." | |
| return blank, "", "", "", "" | |
| decision = route_prompt(prompt.strip(), complexity.lower()) | |
| # Provider badge | |
| provider_display = ( | |
| f"**{decision.provider}**\n\n" | |
| f"Confidence: {decision.confidence:.0%}" | |
| ) | |
| # Metrics | |
| metrics_display = ( | |
| f"**Estimated Latency:** {decision.latency_estimate_ms} ms\n\n" | |
| f"**Estimated Cost:** ${decision.cost_estimate_usd:.4f}\n\n" | |
| f"**Reason:** {decision.reason}" | |
| ) | |
| # Decision tree | |
| tree_display = "```\n" + "\n".join(decision.decision_path) + "\n```" | |
| # Mock response | |
| mock_response = ( | |
| f"**[{decision.provider}]** (simulated)\n\n" | |
| f"{MOCK_RESPONSES[decision.provider]}" | |
| ) | |
| # Cache status (simulate) | |
| cache_hash = hashlib.md5(prompt.encode()).hexdigest()[:12] | |
| cache_display = ( | |
| f"**Cache Key:** `{cache_hash}`\n\n" | |
| f"**Cache Status:** MISS (first request)\n\n" | |
| f"**TTL:** 3600s\n\n" | |
| f"Subsequent identical prompts would return cached response " | |
| f"with <5ms latency and $0.00 cost." | |
| ) | |
| return provider_display, metrics_display, tree_display, mock_response, cache_display | |
| # --------------------------------------------------------------------------- | |
| # Tab 2: HNSW Vector Search | |
| # --------------------------------------------------------------------------- | |
| SENTENCES: list[str] = [ | |
| "Neural networks learn hierarchical representations of data", | |
| "Transformers use self-attention mechanisms for sequence modeling", | |
| "Gradient descent optimizes model parameters iteratively", | |
| "Convolutional neural networks excel at image recognition tasks", | |
| "Recurrent neural networks process sequential data with memory", | |
| "Generative adversarial networks create realistic synthetic data", | |
| "Transfer learning reuses pretrained models for new tasks", | |
| "Reinforcement learning agents learn through reward signals", | |
| "Natural language processing enables machines to understand text", | |
| "Computer vision systems interpret and analyze visual information", | |
| "Embeddings represent discrete objects as continuous vectors", | |
| "Attention mechanisms allow models to focus on relevant inputs", | |
| "Backpropagation computes gradients through computational graphs", | |
| "Batch normalization stabilizes and accelerates neural network training", | |
| "Dropout regularization prevents overfitting in deep networks", | |
| "Learning rate scheduling improves convergence during training", | |
| "Data augmentation increases training set diversity artificially", | |
| "Hyperparameter tuning optimizes model configuration systematically", | |
| "Cross-validation estimates model performance on unseen data", | |
| "Feature engineering transforms raw data into informative representations", | |
| "Dimensionality reduction compresses high-dimensional data efficiently", | |
| "Clustering algorithms group similar data points together", | |
| "Decision trees split data based on feature thresholds", | |
| "Random forests combine multiple decision trees for robustness", | |
| "Support vector machines find optimal separating hyperplanes", | |
| "K-nearest neighbors classifies based on proximity in feature space", | |
| "Principal component analysis finds directions of maximum variance", | |
| "Autoencoders learn compressed representations through reconstruction", | |
| "Variational autoencoders generate new samples from learned distributions", | |
| "Graph neural networks operate on graph-structured data", | |
| "Federated learning trains models across decentralized data sources", | |
| "Differential privacy adds noise to protect individual data points", | |
| "Model distillation compresses large models into smaller ones", | |
| "Quantization reduces model size by lowering numerical precision", | |
| "Pruning removes unnecessary weights from neural networks", | |
| "ONNX provides a standard format for neural network interchange", | |
| "TensorRT optimizes models for inference on NVIDIA hardware", | |
| "Vector databases enable efficient similarity search at scale", | |
| "Retrieval augmented generation combines search with language models", | |
| "Prompt engineering designs effective inputs for language models", | |
| "Fine-tuning adapts pretrained models to specific domains", | |
| "RLHF aligns language models with human preferences", | |
| "Chain of thought prompting improves reasoning in language models", | |
| "Multi-modal models process text images and audio together", | |
| "Tokenization converts text into numerical sequences for models", | |
| "Beam search explores multiple decoding paths simultaneously", | |
| "Temperature scaling controls randomness in model generation", | |
| "Contrastive learning trains models by comparing positive and negative pairs", | |
| "Self-supervised learning extracts labels from the data itself", | |
| "Few-shot learning enables models to learn from minimal examples", | |
| ] | |
| def _deterministic_embed(text: str, dim: int = 64) -> np.ndarray: | |
| """Generate a deterministic pseudo-embedding from text content. | |
| Uses character-level hashing with trigram overlap to produce vectors | |
| where semantically similar sentences get closer embeddings. This is | |
| not a real language model -- it is a reproducible approximation that | |
| makes the demo meaningful without any ML dependencies. | |
| """ | |
| rng = np.random.RandomState( | |
| int(hashlib.sha256(text.lower().encode()).hexdigest(), 16) % (2**31) | |
| ) | |
| base = rng.randn(dim).astype(np.float32) | |
| # Add trigram-based signal so overlapping words produce closer vectors | |
| words = text.lower().split() | |
| for word in words: | |
| word_seed = int(hashlib.md5(word.encode()).hexdigest(), 16) % (2**31) | |
| word_rng = np.random.RandomState(word_seed) | |
| base += word_rng.randn(dim).astype(np.float32) * 0.3 | |
| norm = np.linalg.norm(base) | |
| if norm > 0: | |
| base /= norm | |
| return base | |
| # Pre-compute corpus embeddings at module load | |
| _CORPUS_EMBEDDINGS: np.ndarray = np.stack( | |
| [_deterministic_embed(s) for s in SENTENCES] | |
| ) | |
| def _cosine_similarity(a: np.ndarray, b: np.ndarray) -> float: | |
| dot = float(np.dot(a, b)) | |
| norm_a = float(np.linalg.norm(a)) | |
| norm_b = float(np.linalg.norm(b)) | |
| if norm_a == 0 or norm_b == 0: | |
| return 0.0 | |
| return dot / (norm_a * norm_b) | |
| def _search_vectors( | |
| query: str, top_k: int = 8 | |
| ) -> list[tuple[str, float, int]]: | |
| """Search the corpus and return (sentence, similarity, index) tuples.""" | |
| query_vec = _deterministic_embed(query) | |
| similarities = _CORPUS_EMBEDDINGS @ query_vec | |
| top_indices = np.argsort(similarities)[::-1][:top_k] | |
| return [ | |
| (SENTENCES[i], float(similarities[i]), int(i)) | |
| for i in top_indices | |
| ] | |
| def _build_vector_plot( | |
| query: str, results: list[tuple[str, float, int]] | |
| ) -> plt.Figure: | |
| """Build a 2D PCA-like projection of the vector space highlighting results.""" | |
| # Simple 2D projection using first two principal-ish components | |
| # (deterministic random projection for consistency) | |
| rng = np.random.RandomState(42) | |
| projection_matrix = rng.randn(64, 2).astype(np.float32) | |
| projection_matrix /= np.linalg.norm(projection_matrix, axis=0, keepdims=True) | |
| all_2d = _CORPUS_EMBEDDINGS @ projection_matrix | |
| query_vec = _deterministic_embed(query) | |
| query_2d = query_vec @ projection_matrix | |
| result_indices = {r[2] for r in results} | |
| fig, ax = plt.subplots(figsize=(8, 6)) | |
| fig.patch.set_facecolor(DARK_BG) | |
| ax.set_facecolor(DARK_SURFACE) | |
| # Plot all corpus points | |
| non_result_mask = np.array( | |
| [i not in result_indices for i in range(len(SENTENCES))] | |
| ) | |
| ax.scatter( | |
| all_2d[non_result_mask, 0], | |
| all_2d[non_result_mask, 1], | |
| c=TEXT_SECONDARY, | |
| alpha=0.3, | |
| s=20, | |
| label="Corpus", | |
| ) | |
| # Plot result points | |
| for sentence, sim, idx in results: | |
| ax.scatter( | |
| all_2d[idx, 0], | |
| all_2d[idx, 1], | |
| c=ACCENT_BLUE, | |
| alpha=max(0.4, sim), | |
| s=60 + sim * 80, | |
| zorder=5, | |
| ) | |
| # Draw line from query to result | |
| ax.plot( | |
| [query_2d[0], all_2d[idx, 0]], | |
| [query_2d[1], all_2d[idx, 1]], | |
| color=ACCENT_BLUE, | |
| alpha=0.15 + sim * 0.3, | |
| linewidth=0.8, | |
| ) | |
| # Plot query point | |
| ax.scatter( | |
| query_2d[0], | |
| query_2d[1], | |
| c=ACCENT_RED, | |
| s=120, | |
| marker="*", | |
| zorder=10, | |
| label="Query", | |
| ) | |
| ax.set_title( | |
| "Vector Space Projection (2D)", | |
| color=TEXT_PRIMARY, | |
| fontsize=13, | |
| fontweight="bold", | |
| pad=12, | |
| ) | |
| ax.tick_params(colors=TEXT_SECONDARY, labelsize=8) | |
| for spine in ax.spines.values(): | |
| spine.set_color(DARK_BORDER) | |
| ax.legend( | |
| facecolor=DARK_SURFACE, | |
| edgecolor=DARK_BORDER, | |
| labelcolor=TEXT_PRIMARY, | |
| fontsize=9, | |
| ) | |
| fig.tight_layout() | |
| return fig | |
| def handle_vector_search(query: str) -> tuple[str, Any]: | |
| """Run vector search and return results markdown + plot.""" | |
| if not query or not query.strip(): | |
| return "Enter a query to search the vector corpus.", None | |
| start = time.perf_counter() | |
| results = _search_vectors(query.strip()) | |
| elapsed_us = (time.perf_counter() - start) * 1_000_000 | |
| lines = [ | |
| f"**Query:** {query.strip()}\n", | |
| f"**Search time:** {elapsed_us:.0f} us | " | |
| f"**Corpus size:** {len(SENTENCES)} sentences | " | |
| f"**Embedding dim:** 64\n", | |
| "---\n", | |
| "| Rank | Similarity | Sentence |", | |
| "|------|-----------|----------|", | |
| ] | |
| for rank, (sentence, sim, _idx) in enumerate(results, 1): | |
| bar_length = int(sim * 20) | |
| bar = "+" * bar_length + "-" * (20 - bar_length) | |
| lines.append(f"| {rank} | `{sim:.4f}` [{bar}] | {sentence} |") | |
| lines.append("\n---\n") | |
| lines.append( | |
| "*Embeddings are deterministic pseudo-vectors (trigram hashing). " | |
| "In production, Citadel uses real transformer embeddings with " | |
| "a custom HNSW index supporting millions of vectors.*" | |
| ) | |
| fig = _build_vector_plot(query.strip(), results) | |
| return "\n".join(lines), fig | |
| # --------------------------------------------------------------------------- | |
| # Tab 3: ReAct Agent Runtime | |
| # --------------------------------------------------------------------------- | |
| AGENT_SCENARIOS: dict[str, list[dict[str, str]]] = { | |
| "research": [ | |
| { | |
| "thought": ( | |
| "The user wants me to research a topic. I should start by " | |
| "searching for relevant information, then synthesize findings." | |
| ), | |
| "action": "search_knowledge_base", | |
| "action_input": '{"query": "<TOPIC>", "top_k": 5}', | |
| "observation": ( | |
| "Found 5 relevant documents:\n" | |
| " [1] Overview and fundamentals (relevance: 0.92)\n" | |
| " [2] Recent developments in 2025 (relevance: 0.87)\n" | |
| " [3] Comparison with alternative approaches (relevance: 0.81)\n" | |
| " [4] Implementation best practices (relevance: 0.76)\n" | |
| " [5] Common pitfalls and solutions (relevance: 0.71)" | |
| ), | |
| }, | |
| { | |
| "thought": ( | |
| "I have foundational documents. Let me check for any recent " | |
| "updates or corrections to ensure accuracy." | |
| ), | |
| "action": "web_search", | |
| "action_input": '{"query": "<TOPIC> latest developments 2026"}', | |
| "observation": ( | |
| "3 recent results found:\n" | |
| " [1] New benchmark results published March 2026\n" | |
| " [2] Industry adoption report Q1 2026\n" | |
| " [3] Critical vulnerability patch released Feb 2026" | |
| ), | |
| }, | |
| { | |
| "thought": ( | |
| "I now have both historical context and recent updates. " | |
| "I can synthesize a comprehensive answer. The vulnerability " | |
| "patch in result [3] is important context to include." | |
| ), | |
| "action": "FINAL_ANSWER", | |
| "action_input": "", | |
| "observation": "", | |
| }, | |
| ], | |
| "code": [ | |
| { | |
| "thought": ( | |
| "The user needs code assistance. I should first understand " | |
| "the codebase structure before making changes." | |
| ), | |
| "action": "read_file", | |
| "action_input": '{"path": "src/main.py"}', | |
| "observation": ( | |
| "File contents (247 lines):\n" | |
| " - Class: ApplicationServer (lines 12-180)\n" | |
| " - Function: configure_routes (lines 182-220)\n" | |
| " - Function: main (lines 222-247)\n" | |
| " Dependencies: fastapi, uvicorn, pydantic" | |
| ), | |
| }, | |
| { | |
| "thought": ( | |
| "I see the structure. The ApplicationServer class handles " | |
| "routing. I need to check the test file to understand " | |
| "expected behavior before modifying anything." | |
| ), | |
| "action": "read_file", | |
| "action_input": '{"path": "tests/test_main.py"}', | |
| "observation": ( | |
| "Test file (89 lines):\n" | |
| " - test_server_startup: verifies port binding\n" | |
| " - test_health_endpoint: checks /health returns 200\n" | |
| " - test_graceful_shutdown: ensures cleanup on SIGTERM\n" | |
| " All 3 tests currently passing" | |
| ), | |
| }, | |
| { | |
| "thought": ( | |
| "I understand the codebase and test expectations. I can now " | |
| "implement the requested change while maintaining backward " | |
| "compatibility with existing tests." | |
| ), | |
| "action": "write_file", | |
| "action_input": '{"path": "src/main.py", "changes": "..."}', | |
| "observation": "File updated successfully. Running tests...\n 3/3 tests passing. No regressions.", | |
| }, | |
| { | |
| "thought": "The change is implemented and all tests pass. I can provide the final answer with a summary of what changed.", | |
| "action": "FINAL_ANSWER", | |
| "action_input": "", | |
| "observation": "", | |
| }, | |
| ], | |
| "analysis": [ | |
| { | |
| "thought": ( | |
| "The user wants data analysis. I should first retrieve the " | |
| "relevant dataset and understand its schema." | |
| ), | |
| "action": "query_database", | |
| "action_input": '{"sql": "SELECT column_name, data_type FROM information_schema.columns WHERE table_name = \'metrics\'"}', | |
| "observation": ( | |
| "Schema for 'metrics' table:\n" | |
| " - timestamp: DATETIME (indexed)\n" | |
| " - provider: VARCHAR(50)\n" | |
| " - latency_ms: FLOAT\n" | |
| " - tokens_used: INTEGER\n" | |
| " - cost_usd: DECIMAL(10,6)\n" | |
| " - status_code: INTEGER\n" | |
| " Total rows: 142,847" | |
| ), | |
| }, | |
| { | |
| "thought": ( | |
| "Good schema understanding. Now I need to run the analytical " | |
| "query the user is interested in. Let me aggregate by provider." | |
| ), | |
| "action": "query_database", | |
| "action_input": ( | |
| '{"sql": "SELECT provider, COUNT(*) as requests, ' | |
| "AVG(latency_ms) as avg_latency, SUM(cost_usd) as total_cost " | |
| "FROM metrics WHERE timestamp > '2026-02-01' " | |
| 'GROUP BY provider ORDER BY requests DESC"}' | |
| ), | |
| "observation": ( | |
| "Results:\n" | |
| " | Provider | Requests | Avg Latency | Total Cost |\n" | |
| " |----------|----------|-------------|------------|\n" | |
| " | Ollama | 89,234 | 142ms | $0.00 |\n" | |
| " | Gemini | 38,991 | 687ms | $12.47 |\n" | |
| " | Claude | 14,622 | 1,284ms | $48.93 |" | |
| ), | |
| }, | |
| { | |
| "thought": ( | |
| "Clear picture: Ollama handles the bulk of requests at zero " | |
| "cost, Gemini is the middle tier, and Claude handles complex " | |
| "queries at higher cost but lower volume. The routing strategy " | |
| "is working as designed. I can now present findings." | |
| ), | |
| "action": "FINAL_ANSWER", | |
| "action_input": "", | |
| "observation": "", | |
| }, | |
| ], | |
| } | |
| def _select_scenario(task: str) -> tuple[str, list[dict[str, str]]]: | |
| """Pick the most relevant scenario based on task keywords.""" | |
| task_lower = task.lower() | |
| if any(kw in task_lower for kw in ["code", "implement", "fix", "bug", "refactor", "function"]): | |
| return "code", AGENT_SCENARIOS["code"] | |
| if any(kw in task_lower for kw in ["data", "analyz", "metric", "query", "report", "stats"]): | |
| return "analysis", AGENT_SCENARIOS["analysis"] | |
| return "research", AGENT_SCENARIOS["research"] | |
| def handle_agent_task(task: str) -> str: | |
| """Generate a ReAct agent trace for the given task.""" | |
| if not task or not task.strip(): | |
| return "Enter a task above to see the ReAct agent reasoning loop." | |
| scenario_type, steps = _select_scenario(task.strip()) | |
| topic = task.strip() | |
| lines: list[str] = [ | |
| f"## ReAct Agent Trace\n", | |
| f"**Task:** {topic}\n", | |
| f"**Scenario type:** {scenario_type}\n", | |
| f"**Registered tools:** search_knowledge_base, web_search, read_file, " | |
| f"write_file, query_database, execute_code\n", | |
| "---\n", | |
| ] | |
| for step_num, step in enumerate(steps, 1): | |
| thought = step["thought"].replace("<TOPIC>", topic) | |
| action = step["action"] | |
| action_input = step["action_input"].replace("<TOPIC>", topic) | |
| observation = step["observation"].replace("<TOPIC>", topic) | |
| lines.append(f"### Step {step_num}\n") | |
| lines.append(f"**Thought:** {thought}\n") | |
| if action == "FINAL_ANSWER": | |
| lines.append("**Action:** `FINAL_ANSWER`\n") | |
| lines.append( | |
| f"**Result:** Based on the information gathered across " | |
| f"{step_num - 1} tool invocations, I have synthesized a " | |
| f"comprehensive response to the user's request regarding " | |
| f"*{topic}*.\n" | |
| ) | |
| else: | |
| lines.append(f"**Action:** `{action}({action_input})`\n") | |
| lines.append(f"**Observation:**\n```\n{observation}\n```\n") | |
| lines.append("---\n") | |
| lines.append( | |
| f"**Agent completed in {len(steps)} steps " | |
| f"({len(steps) - 1} tool calls + final answer)**\n\n" | |
| f"*In production, Citadel's agent runtime executes real tool calls " | |
| f"with timeout handling, retry logic, and full observability tracing.*" | |
| ) | |
| return "\n".join(lines) | |
| # --------------------------------------------------------------------------- | |
| # Tab 4: Observability Dashboard | |
| # --------------------------------------------------------------------------- | |
| def _generate_timeseries( | |
| hours: int = 24, | |
| base_rate: float = 50.0, | |
| noise: float = 15.0, | |
| trend: float = 0.5, | |
| ) -> tuple[list[str], list[float]]: | |
| """Generate realistic-looking time series data.""" | |
| rng = random.Random(42) | |
| timestamps = [] | |
| values = [] | |
| for h in range(hours): | |
| # Simulate daily pattern: lower at night, higher during day | |
| hour_of_day = h % 24 | |
| daily_factor = 0.5 + 0.5 * math.sin((hour_of_day - 6) * math.pi / 12) | |
| value = base_rate * daily_factor + trend * h + rng.gauss(0, noise) | |
| timestamps.append(f"{h:02d}:00") | |
| values.append(max(0, value)) | |
| return timestamps, values | |
| def build_observability_dashboard() -> tuple[Any, str]: | |
| """Build the observability charts and metrics summary.""" | |
| rng = random.Random(42) | |
| # Generate data | |
| hours = 24 | |
| timestamps, rps_values = _generate_timeseries(hours, 45, 12, 0.3) | |
| p50_latencies = [80 + rng.gauss(0, 15) + 20 * math.sin(i * 0.3) for i in range(hours)] | |
| p95_latencies = [lat * (2.5 + rng.gauss(0, 0.3)) for lat in p50_latencies] | |
| provider_requests = {"Ollama": 62_340, "Gemini": 27_891, "Claude": 10_244} | |
| provider_costs = {"Ollama": 0.0, "Gemini": 8.94, "Claude": 34.21} | |
| provider_errors = {"Ollama": 12, "Gemini": 47, "Claude": 8} | |
| token_usage = { | |
| "Prompt tokens": 2_847_291, | |
| "Completion tokens": 1_423_886, | |
| "Cached tokens": 891_204, | |
| } | |
| # Build plotly figure with subplots | |
| # Bottom-right cell is "domain" type to support Pie chart | |
| fig = make_subplots( | |
| rows=2, | |
| cols=2, | |
| subplot_titles=( | |
| "Requests per Second (24h)", | |
| "Latency Distribution (p50 / p95)", | |
| "Cost by Provider", | |
| "Token Usage Breakdown", | |
| ), | |
| specs=[ | |
| [{"type": "xy"}, {"type": "xy"}], | |
| [{"type": "xy"}, {"type": "domain"}], | |
| ], | |
| vertical_spacing=0.14, | |
| horizontal_spacing=0.10, | |
| ) | |
| # Chart 1: RPS time series | |
| fig.add_trace( | |
| go.Scatter( | |
| x=timestamps, | |
| y=rps_values, | |
| mode="lines", | |
| name="req/s", | |
| line=dict(color=ACCENT_BLUE, width=2), | |
| fill="tozeroy", | |
| fillcolor="rgba(88, 166, 255, 0.1)", | |
| ), | |
| row=1, | |
| col=1, | |
| ) | |
| # Chart 2: Latency | |
| fig.add_trace( | |
| go.Scatter( | |
| x=timestamps, | |
| y=p50_latencies, | |
| mode="lines", | |
| name="p50", | |
| line=dict(color=ACCENT_GREEN, width=2), | |
| ), | |
| row=1, | |
| col=2, | |
| ) | |
| fig.add_trace( | |
| go.Scatter( | |
| x=timestamps, | |
| y=p95_latencies, | |
| mode="lines", | |
| name="p95", | |
| line=dict(color=ACCENT_ORANGE, width=2), | |
| ), | |
| row=1, | |
| col=2, | |
| ) | |
| # Chart 3: Cost by provider (bar) | |
| providers = list(provider_costs.keys()) | |
| costs = list(provider_costs.values()) | |
| colors = [ACCENT_GREEN, ACCENT_ORANGE, ACCENT_BLUE] | |
| fig.add_trace( | |
| go.Bar( | |
| x=providers, | |
| y=costs, | |
| name="Cost ($)", | |
| marker_color=colors, | |
| text=[f"${c:.2f}" for c in costs], | |
| textposition="outside", | |
| textfont=dict(color=TEXT_PRIMARY), | |
| ), | |
| row=2, | |
| col=1, | |
| ) | |
| # Chart 4: Token usage (pie) | |
| fig.add_trace( | |
| go.Pie( | |
| labels=list(token_usage.keys()), | |
| values=list(token_usage.values()), | |
| marker=dict(colors=[ACCENT_BLUE, ACCENT_GREEN, ACCENT_ORANGE]), | |
| textinfo="label+percent", | |
| textfont=dict(color=TEXT_PRIMARY, size=11), | |
| hole=0.4, | |
| ), | |
| row=2, | |
| col=2, | |
| ) | |
| # Style | |
| fig.update_layout( | |
| height=620, | |
| paper_bgcolor=DARK_BG, | |
| plot_bgcolor=DARK_SURFACE, | |
| font=dict(color=TEXT_PRIMARY, size=11), | |
| showlegend=True, | |
| legend=dict( | |
| bgcolor=DARK_SURFACE, | |
| bordercolor=DARK_BORDER, | |
| font=dict(color=TEXT_PRIMARY), | |
| ), | |
| margin=dict(t=40, b=30, l=50, r=30), | |
| ) | |
| for annotation in fig.layout.annotations: | |
| annotation.font = dict(color=TEXT_PRIMARY, size=12) | |
| # Style only the XY subplot axes (bottom-right is domain type, no axes) | |
| for axis_name in ["xaxis", "xaxis2", "xaxis3"]: | |
| fig.layout[axis_name].gridcolor = DARK_BORDER | |
| fig.layout[axis_name].tickfont = dict(color=TEXT_SECONDARY) | |
| for axis_name in ["yaxis", "yaxis2", "yaxis3"]: | |
| fig.layout[axis_name].gridcolor = DARK_BORDER | |
| fig.layout[axis_name].tickfont = dict(color=TEXT_SECONDARY) | |
| # Metrics summary | |
| total_requests = sum(provider_requests.values()) | |
| total_cost = sum(provider_costs.values()) | |
| total_errors = sum(provider_errors.values()) | |
| error_rate = total_errors / total_requests * 100 | |
| avg_p50 = sum(p50_latencies) / len(p50_latencies) | |
| avg_p95 = sum(p95_latencies) / len(p95_latencies) | |
| summary_lines = [ | |
| "## Summary Metrics (24h window)\n", | |
| "| Metric | Value |", | |
| "|--------|-------|", | |
| f"| Total requests | {total_requests:,} |", | |
| f"| Avg requests/sec | {total_requests / 86400:.1f} |", | |
| f"| p50 latency | {avg_p50:.0f} ms |", | |
| f"| p95 latency | {avg_p95:.0f} ms |", | |
| f"| Total cost | ${total_cost:.2f} |", | |
| f"| Cost per request | ${total_cost / total_requests:.6f} |", | |
| f"| Total errors | {total_errors:,} |", | |
| f"| Error rate | {error_rate:.3f}% |", | |
| f"| Total tokens | {sum(token_usage.values()):,} |", | |
| f"| Cache hit rate | {token_usage['Cached tokens'] / sum(token_usage.values()) * 100:.1f}% |", | |
| "\n---\n", | |
| "### Provider Breakdown\n", | |
| "| Provider | Requests | Cost | Errors | Error Rate |", | |
| "|----------|----------|------|--------|------------|", | |
| ] | |
| for provider in providers: | |
| req = provider_requests[provider] | |
| cost = provider_costs[provider] | |
| err = provider_errors[provider] | |
| erate = err / req * 100 | |
| summary_lines.append( | |
| f"| {provider} | {req:,} | ${cost:.2f} | {err} | {erate:.3f}% |" | |
| ) | |
| summary_lines.append( | |
| "\n*Metrics are simulated for demonstration. In production, " | |
| "citadel-observe collects real telemetry via OpenTelemetry-compatible " | |
| "exporters with Prometheus/Grafana integration.*" | |
| ) | |
| return fig, "\n".join(summary_lines) | |
| # --------------------------------------------------------------------------- | |
| # Gradio Application | |
| # --------------------------------------------------------------------------- | |
| def build_app() -> gr.Blocks: | |
| """Construct the Gradio Blocks application.""" | |
| with gr.Blocks( | |
| title="Citadel -- AI Operations Platform", | |
| theme=gr.themes.Base( | |
| primary_hue=gr.themes.colors.blue, | |
| secondary_hue=gr.themes.colors.gray, | |
| neutral_hue=gr.themes.colors.gray, | |
| font=gr.themes.GoogleFont("Inter"), | |
| font_mono=gr.themes.GoogleFont("JetBrains Mono"), | |
| ).set( | |
| body_background_fill=DARK_BG, | |
| body_background_fill_dark=DARK_BG, | |
| block_background_fill=DARK_SURFACE, | |
| block_background_fill_dark=DARK_SURFACE, | |
| block_border_color=DARK_BORDER, | |
| block_border_color_dark=DARK_BORDER, | |
| input_background_fill="#0d1117", | |
| input_background_fill_dark="#0d1117", | |
| button_primary_background_fill=ACCENT_BLUE, | |
| button_primary_background_fill_dark=ACCENT_BLUE, | |
| button_primary_text_color="#ffffff", | |
| button_primary_text_color_dark="#ffffff", | |
| ), | |
| css=CUSTOM_CSS, | |
| ) as app: | |
| gr.Markdown( | |
| f""" | |
| # Citadel -- AI Operations Platform | |
| Production-grade AI infrastructure built from first principles. | |
| LLM gateway | Vector search | Agent runtime | Observability | |
| [GitHub](https://github.com/dbhavery/citadel) | |
| """, | |
| ) | |
| # Tab 1: LLM Gateway | |
| with gr.Tab("LLM Gateway"): | |
| gr.Markdown( | |
| "### Multi-Provider Routing Engine\n" | |
| "Enter a prompt and see how Citadel's gateway routes it to the " | |
| "optimal provider based on complexity analysis, regex rules, " | |
| "and cost/latency trade-offs." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| gateway_input = gr.Textbox( | |
| label="Prompt", | |
| placeholder="Try: 'Hello' (simple) or 'Analyze the security implications of...' (complex)", | |
| lines=3, | |
| ) | |
| with gr.Column(scale=1): | |
| complexity_selector = gr.Radio( | |
| choices=["Auto", "Simple", "Moderate", "Complex"], | |
| value="Auto", | |
| label="Complexity Override", | |
| ) | |
| gateway_btn = gr.Button("Route Request", variant="primary") | |
| with gr.Row(): | |
| provider_output = gr.Markdown(label="Selected Provider") | |
| metrics_output = gr.Markdown(label="Routing Metrics") | |
| cache_output = gr.Markdown(label="Cache Layer") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| tree_output = gr.Markdown(label="Decision Path") | |
| with gr.Column(scale=1): | |
| response_output = gr.Markdown(label="Mock Response") | |
| gateway_btn.click( | |
| fn=handle_gateway_request, | |
| inputs=[gateway_input, complexity_selector], | |
| outputs=[ | |
| provider_output, | |
| metrics_output, | |
| tree_output, | |
| response_output, | |
| cache_output, | |
| ], | |
| ) | |
| # Also trigger on Enter | |
| gateway_input.submit( | |
| fn=handle_gateway_request, | |
| inputs=[gateway_input, complexity_selector], | |
| outputs=[ | |
| provider_output, | |
| metrics_output, | |
| tree_output, | |
| response_output, | |
| cache_output, | |
| ], | |
| ) | |
| # Tab 2: Vector Search | |
| with gr.Tab("HNSW Vector Search"): | |
| gr.Markdown( | |
| "### Nearest Neighbor Search\n" | |
| "Search a corpus of 50 AI/ML sentences using cosine similarity. " | |
| "The HNSW index in production supports millions of vectors with " | |
| "sub-millisecond lookup. This demo uses a simplified " | |
| "embedding model for illustration." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| vector_input = gr.Textbox( | |
| label="Search Query", | |
| placeholder="Try: 'how do transformers work' or 'reducing model size'", | |
| lines=1, | |
| ) | |
| with gr.Column(scale=1): | |
| vector_btn = gr.Button("Search Vectors", variant="primary") | |
| vector_results = gr.Markdown(label="Search Results") | |
| vector_plot = gr.Plot(label="Vector Space Visualization") | |
| vector_btn.click( | |
| fn=handle_vector_search, | |
| inputs=[vector_input], | |
| outputs=[vector_results, vector_plot], | |
| ) | |
| vector_input.submit( | |
| fn=handle_vector_search, | |
| inputs=[vector_input], | |
| outputs=[vector_results, vector_plot], | |
| ) | |
| # Tab 3: Agent Runtime | |
| with gr.Tab("Agent Runtime"): | |
| gr.Markdown( | |
| "### ReAct Agent Reasoning Loop\n" | |
| "Enter a task and see how Citadel's agent runtime decomposes " | |
| "it into a Thought-Action-Observation cycle. The agent selects " | |
| "tools, processes results, and builds toward a final answer." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| agent_input = gr.Textbox( | |
| label="Task", | |
| placeholder="Try: 'Research vector databases' or 'Fix the login bug' or 'Analyze API latency trends'", | |
| lines=2, | |
| ) | |
| with gr.Column(scale=1): | |
| agent_btn = gr.Button("Run Agent", variant="primary") | |
| agent_output = gr.Markdown( | |
| label="Agent Trace", | |
| elem_classes=["agent-trace"], | |
| ) | |
| agent_btn.click( | |
| fn=handle_agent_task, | |
| inputs=[agent_input], | |
| outputs=[agent_output], | |
| ) | |
| agent_input.submit( | |
| fn=handle_agent_task, | |
| inputs=[agent_input], | |
| outputs=[agent_output], | |
| ) | |
| # Tab 4: Observability | |
| with gr.Tab("Observability"): | |
| gr.Markdown( | |
| "### Operations Dashboard\n" | |
| "Real-time monitoring of the Citadel platform. Request rates, " | |
| "latency percentiles, cost tracking, and token usage -- " | |
| "everything you need to operate an AI system in production." | |
| ) | |
| refresh_btn = gr.Button("Refresh Dashboard", variant="primary") | |
| obs_plot = gr.Plot(label="Dashboard Charts") | |
| obs_summary = gr.Markdown(label="Metrics Summary") | |
| # Load on page open | |
| app.load( | |
| fn=build_observability_dashboard, | |
| outputs=[obs_plot, obs_summary], | |
| ) | |
| refresh_btn.click( | |
| fn=build_observability_dashboard, | |
| outputs=[obs_plot, obs_summary], | |
| ) | |
| return app | |
| # --------------------------------------------------------------------------- | |
| # Entry point | |
| # --------------------------------------------------------------------------- | |
| if __name__ == "__main__": | |
| application = build_app() | |
| application.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| show_error=True, | |
| ) | |