dbhavery's picture
Upload folder using huggingface_hub
c769e40 verified
"""
Citadel Explorer -- Interactive demo for the Citadel AI Operations Platform.
Showcases LLM gateway routing, HNSW vector search, ReAct agent traces,
and observability dashboards. All demos use mock data; no API keys required.
Source: https://github.com/dbhavery/citadel
"""
from __future__ import annotations
import hashlib
import math
import random
import re
import time
from dataclasses import dataclass, field
from typing import Any
import gradio as gr
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
matplotlib.use("Agg")
# ---------------------------------------------------------------------------
# Shared constants
# ---------------------------------------------------------------------------
DARK_BG = "#0d1117"
DARK_SURFACE = "#161b22"
DARK_BORDER = "#30363d"
ACCENT_BLUE = "#58a6ff"
ACCENT_GREEN = "#3fb950"
ACCENT_ORANGE = "#d29922"
ACCENT_RED = "#f85149"
TEXT_PRIMARY = "#e6edf3"
TEXT_SECONDARY = "#8b949e"
PROVIDER_COLORS = {
"Ollama (local)": ACCENT_GREEN,
"Claude (Anthropic)": ACCENT_BLUE,
"Gemini (Google)": ACCENT_ORANGE,
}
CUSTOM_CSS = """
.dark {
--body-background-fill: #0d1117 !important;
--background-fill-primary: #161b22 !important;
--background-fill-secondary: #0d1117 !important;
--border-color-primary: #30363d !important;
--block-label-text-color: #8b949e !important;
--input-background-fill: #0d1117 !important;
}
.agent-trace {
font-family: 'JetBrains Mono', 'Fira Code', 'Cascadia Code', monospace;
font-size: 13px;
line-height: 1.6;
}
.metric-card {
border: 1px solid #30363d;
border-radius: 8px;
padding: 16px;
background: #161b22;
}
footer { display: none !important; }
"""
# ---------------------------------------------------------------------------
# Tab 1: LLM Gateway -- Routing Logic
# ---------------------------------------------------------------------------
SIMPLE_PATTERNS: list[re.Pattern[str]] = [
re.compile(r"\b(hello|hi|hey|thanks|bye|yes|no|ok)\b", re.IGNORECASE),
re.compile(r"\bwhat (time|day|date)\b", re.IGNORECASE),
re.compile(r"\bdefine\s+\w+\b", re.IGNORECASE),
re.compile(r"\btranslate\b", re.IGNORECASE),
]
COMPLEX_KEYWORDS: list[str] = [
"analyze", "architecture", "compare", "design", "evaluate", "explain why",
"implement", "optimize", "refactor", "review", "security", "trade-off",
"tradeoff", "vulnerability", "debug", "performance",
]
MOCK_RESPONSES: dict[str, str] = {
"Ollama (local)": (
"This is a straightforward request. Here is the answer based on my "
"local knowledge base, processed entirely on-device with zero latency "
"to external APIs."
),
"Claude (Anthropic)": (
"I have analyzed your request carefully. This requires nuanced reasoning "
"across multiple dimensions. Let me walk through the key considerations "
"and provide a structured response with concrete recommendations."
),
"Gemini (Google)": (
"Based on my analysis, this is a moderately complex request. I can provide "
"a detailed response drawing on broad knowledge while maintaining "
"cost-efficiency compared to heavier models."
),
}
@dataclass
class RoutingDecision:
provider: str
reason: str
latency_estimate_ms: int
cost_estimate_usd: float
decision_path: list[str]
confidence: float
def _compute_complexity_score(prompt: str) -> tuple[float, list[str]]:
"""Score prompt complexity on a 0-1 scale with an explanation trail."""
trail: list[str] = []
score = 0.0
# Length factor
word_count = len(prompt.split())
length_score = min(word_count / 100.0, 1.0) * 0.3
score += length_score
trail.append(f"Word count: {word_count} -> length factor: {length_score:.2f}")
# Keyword factor -- scales with density of complex keywords
keyword_hits = [kw for kw in COMPLEX_KEYWORDS if kw in prompt.lower()]
keyword_score = min(len(keyword_hits) / 2.0, 1.0) * 0.5
score += keyword_score
if keyword_hits:
trail.append(
f"Complex keywords found: [{', '.join(keyword_hits)}] "
f"-> keyword factor: {keyword_score:.2f}"
)
else:
trail.append(f"No complex keywords -> keyword factor: 0.00")
# Question depth (multiple questions imply complexity)
question_count = prompt.count("?")
question_score = min(question_count / 3.0, 1.0) * 0.15
score += question_score
trail.append(
f"Question marks: {question_count} -> question factor: {question_score:.2f}"
)
# Code/technical markers
code_markers = ["```", "def ", "class ", "function ", "SELECT ", "CREATE ", "import "]
code_hits = [m for m in code_markers if m in prompt]
code_score = min(len(code_hits) / 2.0, 1.0) * 0.15
score += code_score
if code_hits:
trail.append(
f"Code markers: [{', '.join(code_hits)}] -> code factor: {code_score:.2f}"
)
else:
trail.append(f"No code markers -> code factor: 0.00")
trail.append(f"Final complexity score: {score:.3f}")
return min(score, 1.0), trail
def route_prompt(prompt: str, complexity_override: str) -> RoutingDecision:
"""Determine which provider should handle this prompt."""
decision_path: list[str] = []
# Step 1: Check for regex-matched simple patterns
decision_path.append("[1] Checking regex rules for trivial patterns...")
if complexity_override == "simple":
decision_path.append(" User override: complexity=simple")
elif complexity_override == "complex":
decision_path.append(" User override: complexity=complex")
else:
for pattern in SIMPLE_PATTERNS:
if pattern.search(prompt):
decision_path.append(f" Matched pattern: {pattern.pattern}")
decision_path.append(" -> Route to Ollama (local, fast, free)")
return RoutingDecision(
provider="Ollama (local)",
reason="Matched simple-pattern regex rule",
latency_estimate_ms=random.randint(80, 250),
cost_estimate_usd=0.0,
decision_path=decision_path,
confidence=0.95,
)
decision_path.append(" No regex match")
# Step 2: Complexity scoring
decision_path.append("[2] Running complexity scorer...")
score, trail = _compute_complexity_score(prompt)
decision_path.extend(f" {line}" for line in trail)
# Apply override
if complexity_override == "simple":
score = min(score, 0.2)
decision_path.append(" Override applied: clamped score to <= 0.2")
elif complexity_override == "complex":
score = max(score, 0.7)
decision_path.append(" Override applied: raised score to >= 0.7")
# Step 3: Provider selection based on tier thresholds
OLLAMA_CEILING = 0.25
GEMINI_CEILING = 0.55
decision_path.append(
f"[3] Selecting provider (thresholds: "
f"Ollama <{OLLAMA_CEILING}, Gemini <{GEMINI_CEILING}, Claude >={GEMINI_CEILING})..."
)
if score < OLLAMA_CEILING:
provider = "Ollama (local)"
latency = random.randint(80, 300)
cost = 0.0
decision_path.append(f" Score {score:.3f} < {OLLAMA_CEILING} -> Ollama (local)")
elif score < GEMINI_CEILING:
provider = "Gemini (Google)"
latency = random.randint(400, 1200)
cost = round(random.uniform(0.001, 0.008), 4)
decision_path.append(
f" Score {OLLAMA_CEILING} <= {score:.3f} < {GEMINI_CEILING} -> Gemini (Google)"
)
else:
provider = "Claude (Anthropic)"
latency = random.randint(800, 2500)
cost = round(random.uniform(0.005, 0.025), 4)
decision_path.append(f" Score {score:.3f} >= {GEMINI_CEILING} -> Claude (Anthropic)")
# Step 4: Circuit breaker check (simulated -- always healthy in demo)
decision_path.append("[4] Circuit breaker check: all providers HEALTHY")
decision_path.append(f"[5] Final decision: {provider}")
return RoutingDecision(
provider=provider,
reason=f"Complexity score {score:.3f} routed to {provider}",
latency_estimate_ms=latency,
cost_estimate_usd=cost,
decision_path=decision_path,
confidence=round(0.7 + score * 0.25, 2),
)
def handle_gateway_request(
prompt: str, complexity: str
) -> tuple[str, str, str, str, str]:
"""Process a gateway routing request. Returns 5 strings for the UI outputs."""
if not prompt or not prompt.strip():
blank = "Enter a prompt above to see routing in action."
return blank, "", "", "", ""
decision = route_prompt(prompt.strip(), complexity.lower())
# Provider badge
provider_display = (
f"**{decision.provider}**\n\n"
f"Confidence: {decision.confidence:.0%}"
)
# Metrics
metrics_display = (
f"**Estimated Latency:** {decision.latency_estimate_ms} ms\n\n"
f"**Estimated Cost:** ${decision.cost_estimate_usd:.4f}\n\n"
f"**Reason:** {decision.reason}"
)
# Decision tree
tree_display = "```\n" + "\n".join(decision.decision_path) + "\n```"
# Mock response
mock_response = (
f"**[{decision.provider}]** (simulated)\n\n"
f"{MOCK_RESPONSES[decision.provider]}"
)
# Cache status (simulate)
cache_hash = hashlib.md5(prompt.encode()).hexdigest()[:12]
cache_display = (
f"**Cache Key:** `{cache_hash}`\n\n"
f"**Cache Status:** MISS (first request)\n\n"
f"**TTL:** 3600s\n\n"
f"Subsequent identical prompts would return cached response "
f"with <5ms latency and $0.00 cost."
)
return provider_display, metrics_display, tree_display, mock_response, cache_display
# ---------------------------------------------------------------------------
# Tab 2: HNSW Vector Search
# ---------------------------------------------------------------------------
SENTENCES: list[str] = [
"Neural networks learn hierarchical representations of data",
"Transformers use self-attention mechanisms for sequence modeling",
"Gradient descent optimizes model parameters iteratively",
"Convolutional neural networks excel at image recognition tasks",
"Recurrent neural networks process sequential data with memory",
"Generative adversarial networks create realistic synthetic data",
"Transfer learning reuses pretrained models for new tasks",
"Reinforcement learning agents learn through reward signals",
"Natural language processing enables machines to understand text",
"Computer vision systems interpret and analyze visual information",
"Embeddings represent discrete objects as continuous vectors",
"Attention mechanisms allow models to focus on relevant inputs",
"Backpropagation computes gradients through computational graphs",
"Batch normalization stabilizes and accelerates neural network training",
"Dropout regularization prevents overfitting in deep networks",
"Learning rate scheduling improves convergence during training",
"Data augmentation increases training set diversity artificially",
"Hyperparameter tuning optimizes model configuration systematically",
"Cross-validation estimates model performance on unseen data",
"Feature engineering transforms raw data into informative representations",
"Dimensionality reduction compresses high-dimensional data efficiently",
"Clustering algorithms group similar data points together",
"Decision trees split data based on feature thresholds",
"Random forests combine multiple decision trees for robustness",
"Support vector machines find optimal separating hyperplanes",
"K-nearest neighbors classifies based on proximity in feature space",
"Principal component analysis finds directions of maximum variance",
"Autoencoders learn compressed representations through reconstruction",
"Variational autoencoders generate new samples from learned distributions",
"Graph neural networks operate on graph-structured data",
"Federated learning trains models across decentralized data sources",
"Differential privacy adds noise to protect individual data points",
"Model distillation compresses large models into smaller ones",
"Quantization reduces model size by lowering numerical precision",
"Pruning removes unnecessary weights from neural networks",
"ONNX provides a standard format for neural network interchange",
"TensorRT optimizes models for inference on NVIDIA hardware",
"Vector databases enable efficient similarity search at scale",
"Retrieval augmented generation combines search with language models",
"Prompt engineering designs effective inputs for language models",
"Fine-tuning adapts pretrained models to specific domains",
"RLHF aligns language models with human preferences",
"Chain of thought prompting improves reasoning in language models",
"Multi-modal models process text images and audio together",
"Tokenization converts text into numerical sequences for models",
"Beam search explores multiple decoding paths simultaneously",
"Temperature scaling controls randomness in model generation",
"Contrastive learning trains models by comparing positive and negative pairs",
"Self-supervised learning extracts labels from the data itself",
"Few-shot learning enables models to learn from minimal examples",
]
def _deterministic_embed(text: str, dim: int = 64) -> np.ndarray:
"""Generate a deterministic pseudo-embedding from text content.
Uses character-level hashing with trigram overlap to produce vectors
where semantically similar sentences get closer embeddings. This is
not a real language model -- it is a reproducible approximation that
makes the demo meaningful without any ML dependencies.
"""
rng = np.random.RandomState(
int(hashlib.sha256(text.lower().encode()).hexdigest(), 16) % (2**31)
)
base = rng.randn(dim).astype(np.float32)
# Add trigram-based signal so overlapping words produce closer vectors
words = text.lower().split()
for word in words:
word_seed = int(hashlib.md5(word.encode()).hexdigest(), 16) % (2**31)
word_rng = np.random.RandomState(word_seed)
base += word_rng.randn(dim).astype(np.float32) * 0.3
norm = np.linalg.norm(base)
if norm > 0:
base /= norm
return base
# Pre-compute corpus embeddings at module load
_CORPUS_EMBEDDINGS: np.ndarray = np.stack(
[_deterministic_embed(s) for s in SENTENCES]
)
def _cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
dot = float(np.dot(a, b))
norm_a = float(np.linalg.norm(a))
norm_b = float(np.linalg.norm(b))
if norm_a == 0 or norm_b == 0:
return 0.0
return dot / (norm_a * norm_b)
def _search_vectors(
query: str, top_k: int = 8
) -> list[tuple[str, float, int]]:
"""Search the corpus and return (sentence, similarity, index) tuples."""
query_vec = _deterministic_embed(query)
similarities = _CORPUS_EMBEDDINGS @ query_vec
top_indices = np.argsort(similarities)[::-1][:top_k]
return [
(SENTENCES[i], float(similarities[i]), int(i))
for i in top_indices
]
def _build_vector_plot(
query: str, results: list[tuple[str, float, int]]
) -> plt.Figure:
"""Build a 2D PCA-like projection of the vector space highlighting results."""
# Simple 2D projection using first two principal-ish components
# (deterministic random projection for consistency)
rng = np.random.RandomState(42)
projection_matrix = rng.randn(64, 2).astype(np.float32)
projection_matrix /= np.linalg.norm(projection_matrix, axis=0, keepdims=True)
all_2d = _CORPUS_EMBEDDINGS @ projection_matrix
query_vec = _deterministic_embed(query)
query_2d = query_vec @ projection_matrix
result_indices = {r[2] for r in results}
fig, ax = plt.subplots(figsize=(8, 6))
fig.patch.set_facecolor(DARK_BG)
ax.set_facecolor(DARK_SURFACE)
# Plot all corpus points
non_result_mask = np.array(
[i not in result_indices for i in range(len(SENTENCES))]
)
ax.scatter(
all_2d[non_result_mask, 0],
all_2d[non_result_mask, 1],
c=TEXT_SECONDARY,
alpha=0.3,
s=20,
label="Corpus",
)
# Plot result points
for sentence, sim, idx in results:
ax.scatter(
all_2d[idx, 0],
all_2d[idx, 1],
c=ACCENT_BLUE,
alpha=max(0.4, sim),
s=60 + sim * 80,
zorder=5,
)
# Draw line from query to result
ax.plot(
[query_2d[0], all_2d[idx, 0]],
[query_2d[1], all_2d[idx, 1]],
color=ACCENT_BLUE,
alpha=0.15 + sim * 0.3,
linewidth=0.8,
)
# Plot query point
ax.scatter(
query_2d[0],
query_2d[1],
c=ACCENT_RED,
s=120,
marker="*",
zorder=10,
label="Query",
)
ax.set_title(
"Vector Space Projection (2D)",
color=TEXT_PRIMARY,
fontsize=13,
fontweight="bold",
pad=12,
)
ax.tick_params(colors=TEXT_SECONDARY, labelsize=8)
for spine in ax.spines.values():
spine.set_color(DARK_BORDER)
ax.legend(
facecolor=DARK_SURFACE,
edgecolor=DARK_BORDER,
labelcolor=TEXT_PRIMARY,
fontsize=9,
)
fig.tight_layout()
return fig
def handle_vector_search(query: str) -> tuple[str, Any]:
"""Run vector search and return results markdown + plot."""
if not query or not query.strip():
return "Enter a query to search the vector corpus.", None
start = time.perf_counter()
results = _search_vectors(query.strip())
elapsed_us = (time.perf_counter() - start) * 1_000_000
lines = [
f"**Query:** {query.strip()}\n",
f"**Search time:** {elapsed_us:.0f} us | "
f"**Corpus size:** {len(SENTENCES)} sentences | "
f"**Embedding dim:** 64\n",
"---\n",
"| Rank | Similarity | Sentence |",
"|------|-----------|----------|",
]
for rank, (sentence, sim, _idx) in enumerate(results, 1):
bar_length = int(sim * 20)
bar = "+" * bar_length + "-" * (20 - bar_length)
lines.append(f"| {rank} | `{sim:.4f}` [{bar}] | {sentence} |")
lines.append("\n---\n")
lines.append(
"*Embeddings are deterministic pseudo-vectors (trigram hashing). "
"In production, Citadel uses real transformer embeddings with "
"a custom HNSW index supporting millions of vectors.*"
)
fig = _build_vector_plot(query.strip(), results)
return "\n".join(lines), fig
# ---------------------------------------------------------------------------
# Tab 3: ReAct Agent Runtime
# ---------------------------------------------------------------------------
AGENT_SCENARIOS: dict[str, list[dict[str, str]]] = {
"research": [
{
"thought": (
"The user wants me to research a topic. I should start by "
"searching for relevant information, then synthesize findings."
),
"action": "search_knowledge_base",
"action_input": '{"query": "<TOPIC>", "top_k": 5}',
"observation": (
"Found 5 relevant documents:\n"
" [1] Overview and fundamentals (relevance: 0.92)\n"
" [2] Recent developments in 2025 (relevance: 0.87)\n"
" [3] Comparison with alternative approaches (relevance: 0.81)\n"
" [4] Implementation best practices (relevance: 0.76)\n"
" [5] Common pitfalls and solutions (relevance: 0.71)"
),
},
{
"thought": (
"I have foundational documents. Let me check for any recent "
"updates or corrections to ensure accuracy."
),
"action": "web_search",
"action_input": '{"query": "<TOPIC> latest developments 2026"}',
"observation": (
"3 recent results found:\n"
" [1] New benchmark results published March 2026\n"
" [2] Industry adoption report Q1 2026\n"
" [3] Critical vulnerability patch released Feb 2026"
),
},
{
"thought": (
"I now have both historical context and recent updates. "
"I can synthesize a comprehensive answer. The vulnerability "
"patch in result [3] is important context to include."
),
"action": "FINAL_ANSWER",
"action_input": "",
"observation": "",
},
],
"code": [
{
"thought": (
"The user needs code assistance. I should first understand "
"the codebase structure before making changes."
),
"action": "read_file",
"action_input": '{"path": "src/main.py"}',
"observation": (
"File contents (247 lines):\n"
" - Class: ApplicationServer (lines 12-180)\n"
" - Function: configure_routes (lines 182-220)\n"
" - Function: main (lines 222-247)\n"
" Dependencies: fastapi, uvicorn, pydantic"
),
},
{
"thought": (
"I see the structure. The ApplicationServer class handles "
"routing. I need to check the test file to understand "
"expected behavior before modifying anything."
),
"action": "read_file",
"action_input": '{"path": "tests/test_main.py"}',
"observation": (
"Test file (89 lines):\n"
" - test_server_startup: verifies port binding\n"
" - test_health_endpoint: checks /health returns 200\n"
" - test_graceful_shutdown: ensures cleanup on SIGTERM\n"
" All 3 tests currently passing"
),
},
{
"thought": (
"I understand the codebase and test expectations. I can now "
"implement the requested change while maintaining backward "
"compatibility with existing tests."
),
"action": "write_file",
"action_input": '{"path": "src/main.py", "changes": "..."}',
"observation": "File updated successfully. Running tests...\n 3/3 tests passing. No regressions.",
},
{
"thought": "The change is implemented and all tests pass. I can provide the final answer with a summary of what changed.",
"action": "FINAL_ANSWER",
"action_input": "",
"observation": "",
},
],
"analysis": [
{
"thought": (
"The user wants data analysis. I should first retrieve the "
"relevant dataset and understand its schema."
),
"action": "query_database",
"action_input": '{"sql": "SELECT column_name, data_type FROM information_schema.columns WHERE table_name = \'metrics\'"}',
"observation": (
"Schema for 'metrics' table:\n"
" - timestamp: DATETIME (indexed)\n"
" - provider: VARCHAR(50)\n"
" - latency_ms: FLOAT\n"
" - tokens_used: INTEGER\n"
" - cost_usd: DECIMAL(10,6)\n"
" - status_code: INTEGER\n"
" Total rows: 142,847"
),
},
{
"thought": (
"Good schema understanding. Now I need to run the analytical "
"query the user is interested in. Let me aggregate by provider."
),
"action": "query_database",
"action_input": (
'{"sql": "SELECT provider, COUNT(*) as requests, '
"AVG(latency_ms) as avg_latency, SUM(cost_usd) as total_cost "
"FROM metrics WHERE timestamp > '2026-02-01' "
'GROUP BY provider ORDER BY requests DESC"}'
),
"observation": (
"Results:\n"
" | Provider | Requests | Avg Latency | Total Cost |\n"
" |----------|----------|-------------|------------|\n"
" | Ollama | 89,234 | 142ms | $0.00 |\n"
" | Gemini | 38,991 | 687ms | $12.47 |\n"
" | Claude | 14,622 | 1,284ms | $48.93 |"
),
},
{
"thought": (
"Clear picture: Ollama handles the bulk of requests at zero "
"cost, Gemini is the middle tier, and Claude handles complex "
"queries at higher cost but lower volume. The routing strategy "
"is working as designed. I can now present findings."
),
"action": "FINAL_ANSWER",
"action_input": "",
"observation": "",
},
],
}
def _select_scenario(task: str) -> tuple[str, list[dict[str, str]]]:
"""Pick the most relevant scenario based on task keywords."""
task_lower = task.lower()
if any(kw in task_lower for kw in ["code", "implement", "fix", "bug", "refactor", "function"]):
return "code", AGENT_SCENARIOS["code"]
if any(kw in task_lower for kw in ["data", "analyz", "metric", "query", "report", "stats"]):
return "analysis", AGENT_SCENARIOS["analysis"]
return "research", AGENT_SCENARIOS["research"]
def handle_agent_task(task: str) -> str:
"""Generate a ReAct agent trace for the given task."""
if not task or not task.strip():
return "Enter a task above to see the ReAct agent reasoning loop."
scenario_type, steps = _select_scenario(task.strip())
topic = task.strip()
lines: list[str] = [
f"## ReAct Agent Trace\n",
f"**Task:** {topic}\n",
f"**Scenario type:** {scenario_type}\n",
f"**Registered tools:** search_knowledge_base, web_search, read_file, "
f"write_file, query_database, execute_code\n",
"---\n",
]
for step_num, step in enumerate(steps, 1):
thought = step["thought"].replace("<TOPIC>", topic)
action = step["action"]
action_input = step["action_input"].replace("<TOPIC>", topic)
observation = step["observation"].replace("<TOPIC>", topic)
lines.append(f"### Step {step_num}\n")
lines.append(f"**Thought:** {thought}\n")
if action == "FINAL_ANSWER":
lines.append("**Action:** `FINAL_ANSWER`\n")
lines.append(
f"**Result:** Based on the information gathered across "
f"{step_num - 1} tool invocations, I have synthesized a "
f"comprehensive response to the user's request regarding "
f"*{topic}*.\n"
)
else:
lines.append(f"**Action:** `{action}({action_input})`\n")
lines.append(f"**Observation:**\n```\n{observation}\n```\n")
lines.append("---\n")
lines.append(
f"**Agent completed in {len(steps)} steps "
f"({len(steps) - 1} tool calls + final answer)**\n\n"
f"*In production, Citadel's agent runtime executes real tool calls "
f"with timeout handling, retry logic, and full observability tracing.*"
)
return "\n".join(lines)
# ---------------------------------------------------------------------------
# Tab 4: Observability Dashboard
# ---------------------------------------------------------------------------
def _generate_timeseries(
hours: int = 24,
base_rate: float = 50.0,
noise: float = 15.0,
trend: float = 0.5,
) -> tuple[list[str], list[float]]:
"""Generate realistic-looking time series data."""
rng = random.Random(42)
timestamps = []
values = []
for h in range(hours):
# Simulate daily pattern: lower at night, higher during day
hour_of_day = h % 24
daily_factor = 0.5 + 0.5 * math.sin((hour_of_day - 6) * math.pi / 12)
value = base_rate * daily_factor + trend * h + rng.gauss(0, noise)
timestamps.append(f"{h:02d}:00")
values.append(max(0, value))
return timestamps, values
def build_observability_dashboard() -> tuple[Any, str]:
"""Build the observability charts and metrics summary."""
rng = random.Random(42)
# Generate data
hours = 24
timestamps, rps_values = _generate_timeseries(hours, 45, 12, 0.3)
p50_latencies = [80 + rng.gauss(0, 15) + 20 * math.sin(i * 0.3) for i in range(hours)]
p95_latencies = [lat * (2.5 + rng.gauss(0, 0.3)) for lat in p50_latencies]
provider_requests = {"Ollama": 62_340, "Gemini": 27_891, "Claude": 10_244}
provider_costs = {"Ollama": 0.0, "Gemini": 8.94, "Claude": 34.21}
provider_errors = {"Ollama": 12, "Gemini": 47, "Claude": 8}
token_usage = {
"Prompt tokens": 2_847_291,
"Completion tokens": 1_423_886,
"Cached tokens": 891_204,
}
# Build plotly figure with subplots
# Bottom-right cell is "domain" type to support Pie chart
fig = make_subplots(
rows=2,
cols=2,
subplot_titles=(
"Requests per Second (24h)",
"Latency Distribution (p50 / p95)",
"Cost by Provider",
"Token Usage Breakdown",
),
specs=[
[{"type": "xy"}, {"type": "xy"}],
[{"type": "xy"}, {"type": "domain"}],
],
vertical_spacing=0.14,
horizontal_spacing=0.10,
)
# Chart 1: RPS time series
fig.add_trace(
go.Scatter(
x=timestamps,
y=rps_values,
mode="lines",
name="req/s",
line=dict(color=ACCENT_BLUE, width=2),
fill="tozeroy",
fillcolor="rgba(88, 166, 255, 0.1)",
),
row=1,
col=1,
)
# Chart 2: Latency
fig.add_trace(
go.Scatter(
x=timestamps,
y=p50_latencies,
mode="lines",
name="p50",
line=dict(color=ACCENT_GREEN, width=2),
),
row=1,
col=2,
)
fig.add_trace(
go.Scatter(
x=timestamps,
y=p95_latencies,
mode="lines",
name="p95",
line=dict(color=ACCENT_ORANGE, width=2),
),
row=1,
col=2,
)
# Chart 3: Cost by provider (bar)
providers = list(provider_costs.keys())
costs = list(provider_costs.values())
colors = [ACCENT_GREEN, ACCENT_ORANGE, ACCENT_BLUE]
fig.add_trace(
go.Bar(
x=providers,
y=costs,
name="Cost ($)",
marker_color=colors,
text=[f"${c:.2f}" for c in costs],
textposition="outside",
textfont=dict(color=TEXT_PRIMARY),
),
row=2,
col=1,
)
# Chart 4: Token usage (pie)
fig.add_trace(
go.Pie(
labels=list(token_usage.keys()),
values=list(token_usage.values()),
marker=dict(colors=[ACCENT_BLUE, ACCENT_GREEN, ACCENT_ORANGE]),
textinfo="label+percent",
textfont=dict(color=TEXT_PRIMARY, size=11),
hole=0.4,
),
row=2,
col=2,
)
# Style
fig.update_layout(
height=620,
paper_bgcolor=DARK_BG,
plot_bgcolor=DARK_SURFACE,
font=dict(color=TEXT_PRIMARY, size=11),
showlegend=True,
legend=dict(
bgcolor=DARK_SURFACE,
bordercolor=DARK_BORDER,
font=dict(color=TEXT_PRIMARY),
),
margin=dict(t=40, b=30, l=50, r=30),
)
for annotation in fig.layout.annotations:
annotation.font = dict(color=TEXT_PRIMARY, size=12)
# Style only the XY subplot axes (bottom-right is domain type, no axes)
for axis_name in ["xaxis", "xaxis2", "xaxis3"]:
fig.layout[axis_name].gridcolor = DARK_BORDER
fig.layout[axis_name].tickfont = dict(color=TEXT_SECONDARY)
for axis_name in ["yaxis", "yaxis2", "yaxis3"]:
fig.layout[axis_name].gridcolor = DARK_BORDER
fig.layout[axis_name].tickfont = dict(color=TEXT_SECONDARY)
# Metrics summary
total_requests = sum(provider_requests.values())
total_cost = sum(provider_costs.values())
total_errors = sum(provider_errors.values())
error_rate = total_errors / total_requests * 100
avg_p50 = sum(p50_latencies) / len(p50_latencies)
avg_p95 = sum(p95_latencies) / len(p95_latencies)
summary_lines = [
"## Summary Metrics (24h window)\n",
"| Metric | Value |",
"|--------|-------|",
f"| Total requests | {total_requests:,} |",
f"| Avg requests/sec | {total_requests / 86400:.1f} |",
f"| p50 latency | {avg_p50:.0f} ms |",
f"| p95 latency | {avg_p95:.0f} ms |",
f"| Total cost | ${total_cost:.2f} |",
f"| Cost per request | ${total_cost / total_requests:.6f} |",
f"| Total errors | {total_errors:,} |",
f"| Error rate | {error_rate:.3f}% |",
f"| Total tokens | {sum(token_usage.values()):,} |",
f"| Cache hit rate | {token_usage['Cached tokens'] / sum(token_usage.values()) * 100:.1f}% |",
"\n---\n",
"### Provider Breakdown\n",
"| Provider | Requests | Cost | Errors | Error Rate |",
"|----------|----------|------|--------|------------|",
]
for provider in providers:
req = provider_requests[provider]
cost = provider_costs[provider]
err = provider_errors[provider]
erate = err / req * 100
summary_lines.append(
f"| {provider} | {req:,} | ${cost:.2f} | {err} | {erate:.3f}% |"
)
summary_lines.append(
"\n*Metrics are simulated for demonstration. In production, "
"citadel-observe collects real telemetry via OpenTelemetry-compatible "
"exporters with Prometheus/Grafana integration.*"
)
return fig, "\n".join(summary_lines)
# ---------------------------------------------------------------------------
# Gradio Application
# ---------------------------------------------------------------------------
def build_app() -> gr.Blocks:
"""Construct the Gradio Blocks application."""
with gr.Blocks(
title="Citadel -- AI Operations Platform",
theme=gr.themes.Base(
primary_hue=gr.themes.colors.blue,
secondary_hue=gr.themes.colors.gray,
neutral_hue=gr.themes.colors.gray,
font=gr.themes.GoogleFont("Inter"),
font_mono=gr.themes.GoogleFont("JetBrains Mono"),
).set(
body_background_fill=DARK_BG,
body_background_fill_dark=DARK_BG,
block_background_fill=DARK_SURFACE,
block_background_fill_dark=DARK_SURFACE,
block_border_color=DARK_BORDER,
block_border_color_dark=DARK_BORDER,
input_background_fill="#0d1117",
input_background_fill_dark="#0d1117",
button_primary_background_fill=ACCENT_BLUE,
button_primary_background_fill_dark=ACCENT_BLUE,
button_primary_text_color="#ffffff",
button_primary_text_color_dark="#ffffff",
),
css=CUSTOM_CSS,
) as app:
gr.Markdown(
f"""
# Citadel -- AI Operations Platform
Production-grade AI infrastructure built from first principles.
LLM gateway | Vector search | Agent runtime | Observability
[GitHub](https://github.com/dbhavery/citadel)
""",
)
# Tab 1: LLM Gateway
with gr.Tab("LLM Gateway"):
gr.Markdown(
"### Multi-Provider Routing Engine\n"
"Enter a prompt and see how Citadel's gateway routes it to the "
"optimal provider based on complexity analysis, regex rules, "
"and cost/latency trade-offs."
)
with gr.Row():
with gr.Column(scale=3):
gateway_input = gr.Textbox(
label="Prompt",
placeholder="Try: 'Hello' (simple) or 'Analyze the security implications of...' (complex)",
lines=3,
)
with gr.Column(scale=1):
complexity_selector = gr.Radio(
choices=["Auto", "Simple", "Moderate", "Complex"],
value="Auto",
label="Complexity Override",
)
gateway_btn = gr.Button("Route Request", variant="primary")
with gr.Row():
provider_output = gr.Markdown(label="Selected Provider")
metrics_output = gr.Markdown(label="Routing Metrics")
cache_output = gr.Markdown(label="Cache Layer")
with gr.Row():
with gr.Column(scale=1):
tree_output = gr.Markdown(label="Decision Path")
with gr.Column(scale=1):
response_output = gr.Markdown(label="Mock Response")
gateway_btn.click(
fn=handle_gateway_request,
inputs=[gateway_input, complexity_selector],
outputs=[
provider_output,
metrics_output,
tree_output,
response_output,
cache_output,
],
)
# Also trigger on Enter
gateway_input.submit(
fn=handle_gateway_request,
inputs=[gateway_input, complexity_selector],
outputs=[
provider_output,
metrics_output,
tree_output,
response_output,
cache_output,
],
)
# Tab 2: Vector Search
with gr.Tab("HNSW Vector Search"):
gr.Markdown(
"### Nearest Neighbor Search\n"
"Search a corpus of 50 AI/ML sentences using cosine similarity. "
"The HNSW index in production supports millions of vectors with "
"sub-millisecond lookup. This demo uses a simplified "
"embedding model for illustration."
)
with gr.Row():
with gr.Column(scale=3):
vector_input = gr.Textbox(
label="Search Query",
placeholder="Try: 'how do transformers work' or 'reducing model size'",
lines=1,
)
with gr.Column(scale=1):
vector_btn = gr.Button("Search Vectors", variant="primary")
vector_results = gr.Markdown(label="Search Results")
vector_plot = gr.Plot(label="Vector Space Visualization")
vector_btn.click(
fn=handle_vector_search,
inputs=[vector_input],
outputs=[vector_results, vector_plot],
)
vector_input.submit(
fn=handle_vector_search,
inputs=[vector_input],
outputs=[vector_results, vector_plot],
)
# Tab 3: Agent Runtime
with gr.Tab("Agent Runtime"):
gr.Markdown(
"### ReAct Agent Reasoning Loop\n"
"Enter a task and see how Citadel's agent runtime decomposes "
"it into a Thought-Action-Observation cycle. The agent selects "
"tools, processes results, and builds toward a final answer."
)
with gr.Row():
with gr.Column(scale=3):
agent_input = gr.Textbox(
label="Task",
placeholder="Try: 'Research vector databases' or 'Fix the login bug' or 'Analyze API latency trends'",
lines=2,
)
with gr.Column(scale=1):
agent_btn = gr.Button("Run Agent", variant="primary")
agent_output = gr.Markdown(
label="Agent Trace",
elem_classes=["agent-trace"],
)
agent_btn.click(
fn=handle_agent_task,
inputs=[agent_input],
outputs=[agent_output],
)
agent_input.submit(
fn=handle_agent_task,
inputs=[agent_input],
outputs=[agent_output],
)
# Tab 4: Observability
with gr.Tab("Observability"):
gr.Markdown(
"### Operations Dashboard\n"
"Real-time monitoring of the Citadel platform. Request rates, "
"latency percentiles, cost tracking, and token usage -- "
"everything you need to operate an AI system in production."
)
refresh_btn = gr.Button("Refresh Dashboard", variant="primary")
obs_plot = gr.Plot(label="Dashboard Charts")
obs_summary = gr.Markdown(label="Metrics Summary")
# Load on page open
app.load(
fn=build_observability_dashboard,
outputs=[obs_plot, obs_summary],
)
refresh_btn.click(
fn=build_observability_dashboard,
outputs=[obs_plot, obs_summary],
)
return app
# ---------------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------------
if __name__ == "__main__":
application = build_app()
application.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True,
)