"""
๐ฅ The Forge โ Public CAPT Fleet Controller
A mind-blowing demo of real multi-brain AI cognition.
SECURITY NOTES:
- OPENROUTER_API_KEY must be set as HF Space secret only
- All errors sanitized before reaching UI
- Free models enforced server-side
- No IP addresses or internal URLs exposed
"""
from __future__ import annotations
import json
import os
import time
import gradio as gr
import requests
from fleet_controller import (
BRAINS, HF_SPACES, ROUTING_KEYWORDS, MODEL_TIERS, TIER_LABELS,
FREE_MODEL_IDS, FREE_MODEL_IDS as _FREE_MODEL_IDS,
route_query, route_with_fallback, call_brain, call_all_brains,
check_brain_health, check_all_brains,
check_space_health, check_all_spaces,
get_all_models, is_free_model, sanitize_error,
)
OPENROUTER_KEY = os.getenv("OPENROUTER_API_KEY", "")
# โโโ Public-safe OpenRouter caller โโโ
def call_openrouter(
prompt: str,
model: str = "deepseek/deepseek-chat-v3-0324",
system: str = "",
temperature: float = 0.7,
max_tokens: int = 2048,
) -> dict:
if not OPENROUTER_KEY:
return {"error": "OPENROUTER_API_KEY not configured โ contact admin"}
# Server-side enforcement: free models only
if not is_free_model(model):
return {"error": f"Model '{model}' is not in the free tier. Only free models are available in this demo."}
try:
r = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={
"Authorization": f"Bearer {OPENROUTER_KEY}",
"Content-Type": "application/json",
"HTTP-Referer": "https://capt.dev",
"X-Title": "CAPT-TheForge-Public",
},
json={
"model": model,
"messages": [
{"role": "system", "content": system or "You are CAPT Forge โ a public demo of multi-brain AI cognition."},
{"role": "user", "content": prompt},
],
"temperature": temperature,
"max_tokens": max_tokens,
},
timeout=60,
)
data = r.json()
if r.status_code != 200:
err_msg = data.get("error", {}).get("message", r.text[:200])
return {"error": sanitize_error(err_msg)}
return {
"content": data["choices"][0]["message"]["content"],
"model": data.get("model", model),
"input_tokens": data.get("usage", {}).get("prompt_tokens", 0),
"output_tokens": data.get("usage", {}).get("completion_tokens", 0),
}
except requests.exceptions.Timeout:
return {"error": "OpenRouter timed out โ try again in a moment"}
except Exception as e:
return {"error": sanitize_error(e)}
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# CUSTOM CSS โ Dark cinematic theme with glow effects
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
FORGE_CSS = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap');
:root {
--forge-bg: #0a0a0f;
--forge-panel: #111118;
--forge-border: #1e1e2e;
--forge-cyan: #00d4ff;
--forge-purple: #a855f7;
--forge-green: #4ade80;
--forge-gold: #fbbf24;
--forge-red: #ef4444;
}
body {
font-family: 'Inter', system-ui, sans-serif !important;
background: var(--forge-bg) !important;
}
/* Glowing borders on active elements */
.gr-button-primary {
background: linear-gradient(135deg, #00d4ff 0%, #a855f7 100%) !important;
border: none !important;
box-shadow: 0 0 20px rgba(0, 212, 255, 0.3) !important;
transition: all 0.3s ease !important;
}
.gr-button-primary:hover {
box-shadow: 0 0 30px rgba(0, 212, 255, 0.5) !important;
transform: translateY(-1px) !important;
}
/* Pipeline module styles */
.pipeline-module {
display: inline-block;
padding: 6px 14px;
margin: 3px;
border-radius: 20px;
font-size: 12px;
font-weight: 500;
font-family: 'JetBrains Mono', monospace;
border: 1px solid;
transition: all 0.5s ease;
}
.pipeline-inactive {
opacity: 0.3;
background: transparent;
color: #666;
border-color: #333;
}
.pipeline-active {
opacity: 1;
box-shadow: 0 0 15px currentColor;
}
.pipeline-pulse { color: #a855f7; background: rgba(168,85,247,0.15); border-color: #a855f7; }
.pipeline-neda { color: #3b82f6; background: rgba(59,130,246,0.15); border-color: #3b82f6; }
.pipeline-hmc { color: #06b6d4; background: rgba(6,182,212,0.15); border-color: #06b6d4; }
.pipeline-cig { color: #f59e0b; background: rgba(245,158,11,0.15); border-color: #f59e0b; }
.pipeline-hdr { color: #ec4899; background: rgba(236,72,153,0.15); border-color: #ec4899; }
.pipeline-qipc { color: #fbbf24; background: rgba(251,191,36,0.15); border-color: #fbbf24; }
.pipeline-echo { color: #4ade80; background: rgba(74,222,128,0.15); border-color: #4ade80; }
.pipeline-meta { color: #94a3b8; background: rgba(148,163,184,0.15); border-color: #94a3b8; }
.pipeline-immu { color: #22c55e; background: rgba(34,197,94,0.15); border-color: #22c55e; }
.pipeline-nds { color: #00d4ff; background: rgba(0,212,255,0.15); border-color: #00d4ff; }
/* Status indicators */
.status-pulse {
display: inline-block;
width: 8px;
height: 8px;
border-radius: 50%;
animation: pulse-glow 2s infinite;
}
.status-online { background: #4ade80; box-shadow: 0 0 8px #4ade80; }
.status-offline { background: #ef4444; box-shadow: 0 0 8px #ef4444; }
@keyframes pulse-glow {
0%, 100% { opacity: 1; }
50% { opacity: 0.5; }
}
/* Monospace JSON blocks */
.json-output {
font-family: 'JetBrains Mono', monospace !important;
font-size: 13px !important;
}
/* Free badge */
.free-badge {
display: inline-block;
padding: 2px 8px;
background: rgba(74,222,128,0.2);
color: #4ade80;
border: 1px solid rgba(74,222,128,0.4);
border-radius: 12px;
font-size: 11px;
font-weight: 600;
margin-left: 8px;
}
"""
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# PIPELINE VISUALIZATION
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
PIPELINE_MODULES = [
("PULSE", "Language Processing", "pipeline-pulse"),
("NEDA", "Pattern Recognition", "pipeline-neda"),
("HMC", "Memory Encoding", "pipeline-hmc"),
("CIG", "Causal Inference", "pipeline-cig"),
("HDR", "Hyperdimensional Reasoning", "pipeline-hdr"),
("QIPC", "Quantum Consensus", "pipeline-qipc"),
("ECHO", "Episodic Memory", "pipeline-echo"),
("META", "Self-Monitoring", "pipeline-meta"),
("IMMU", "Constitutional Guard", "pipeline-immu"),
("NDS", "Decision Synthesis", "pipeline-nds"),
]
def render_pipeline_html(active_modules: list[str] | None = None) -> str:
"""Render the cognitive pipeline as HTML with active modules glowing."""
active = set(active_modules or [])
arrows = " โ "
parts = []
for mod, desc, cls in PIPELINE_MODULES:
state = "pipeline-active" if mod in active else "pipeline-inactive"
parts.append(f'{mod}')
return f"""
{arrows.join(parts)}
"""
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# TAB BUILDERS
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
def build_fleet_dashboard():
with gr.Tab("๐๏ธ Fleet"):
gr.Markdown("## CAPT Fleet Status\nReal-time health of the cognitive architecture.")
with gr.Row():
refresh_btn = gr.Button("๐ Refresh", variant="primary")
auto_refresh = gr.Checkbox(label="Auto-refresh (30s)", value=False)
gr.Markdown("### ๐ง Brain Health")
brain_table = gr.Dataframe(
headers=["", "Brain", "Status", "Latency", "Load"],
datatype=["str", "str", "str", "str", "str"],
interactive=False,
wrap=True,
)
gr.Markdown("### ๐ Space Fleet")
space_table = gr.Dataframe(
headers=["Space", "Status", "HTTP", "Latency", "Note"],
datatype=["str", "str", "str", "str", "str"],
interactive=False,
wrap=True,
)
system_load = gr.HTML(label="System Load")
def refresh_dashboard():
brains = check_all_brains()
spaces = check_all_spaces()
brain_rows = []
latencies = []
for b in brains:
icon = "๐ข" if b.get("online") else "๐ด"
lat = b.get("latency_ms", 0)
latencies.append(lat)
load_bar = "โ" * min(lat // 50, 10) + "โ" * (10 - min(lat // 50, 10))
brain_rows.append([
BRAINS.get(b["brain_id"], {}).get("icon", "๐ง "),
b.get("name", b["brain_id"]),
f"{icon} {b.get('status', 'unknown')}",
f"{lat}ms" if b.get("online") else "โ",
load_bar if b.get("online") else "โ",
])
space_rows = []
for s in spaces:
icon = "๐ข" if s.get("online") else "๐ด"
space_rows.append([
s.get("name", s["id"]),
f"{icon} {s.get('status', 'unknown')}",
str(s.get("http_code", "โ")),
f"{s.get('latency_ms', 0)}ms" if s.get("online") else "โ",
HF_SPACES.get(s["id"], {}).get("issue", ""),
])
# System load gauge
avg_lat = sum(latencies) / max(len(latencies), 1)
load_pct = min(avg_lat / 500 * 100, 100)
color = "#4ade80" if load_pct < 50 else "#fbbf24" if load_pct < 80 else "#ef4444"
load_html = f"""
SYSTEM LOAD
{load_pct:.0f}%
Avg latency: {avg_lat:.0f}ms across {len(latencies)} brains
"""
return brain_rows, space_rows, load_html
refresh_btn.click(refresh_dashboard, outputs=[brain_table, space_table, system_load])
demo.load(refresh_dashboard, outputs=[brain_table, space_table, system_load])
def build_cognitive_pipeline():
with gr.Tab("โก Pipeline"):
gr.Markdown("## Cognitive Pipeline\nWatch how CAPT processes your query through 10 specialized modules.")
pipeline_input = gr.Textbox(
label="Enter a query to visualize",
placeholder="e.g. 'What is consciousness?'",
lines=2,
)
pipeline_btn = gr.Button("๐ฎ Run Pipeline", variant="primary")
pipeline_viz = gr.HTML(label="Pipeline Visualization")
pipeline_result = gr.Textbox(label="Response", lines=6, interactive=False)
pipeline_meta = gr.JSON(label="Trace Metadata")
def run_pipeline(query: str):
if not query.strip():
return render_pipeline_html(), "Enter a query.", {}
# Show all modules activating in sequence
viz = render_pipeline_html([m[0] for m in PIPELINE_MODULES])
# Route with fallback
brain_id, health, notice = route_with_fallback(query)
payload = {"query": query, "context": "", "model": "deepseek/deepseek-chat-v3-0324", "modality": "text"}
result = call_brain(brain_id, "/cogitate", payload)
if "error" in result:
return viz, f"Error: {result['error']}", {}
trace = result.get("pipelineTrace", {})
modules_hit = trace.get("modules", [])
confidence = result.get("confidence", 0)
# Re-render with only hit modules active
viz = render_pipeline_html(modules_hit)
meta = {
"brain": BRAINS.get(brain_id, {}).get("name", brain_id),
"confidence": f"{confidence*100:.1f}%",
"latency_ms": trace.get("totalLatencyMs", 0),
"modules_activated": modules_hit,
"fallback_notice": notice,
}
return viz, result.get("response", ""), meta
pipeline_btn.click(run_pipeline, inputs=[pipeline_input], outputs=[pipeline_viz, pipeline_result, pipeline_meta])
def build_brain_router():
with gr.Tab("๐ง Router"):
gr.Markdown("## Brain Router\nCAPT auto-routes your query to the best specialized brain โ or uses all 5 as a parliament.")
with gr.Row():
with gr.Column(scale=2):
router_query = gr.Textbox(label="Query", placeholder="Ask anything...", lines=3)
router_context = gr.Textbox(label="Context (optional)", placeholder="Additional context", lines=2)
with gr.Row():
router_mode = gr.Radio(
choices=[("Auto-Route", "auto"), ("Manual", "manual"), ("Parliament (All 5)", "council")],
value="auto",
label="Mode"
)
router_brain = gr.Dropdown(
choices=[(f"{b['icon']} {b['name']}", bid) for bid, b in BRAINS.items()],
value="capt-core-01",
label="Target Brain",
visible=False,
)
router_model = gr.Dropdown(
choices=get_all_models(),
value="deepseek/deepseek-chat-v3-0324",
label="Model",
interactive=False,
)
gr.HTML('๐ FREE TIER ONLY
')
router_btn = gr.Button("โก Execute", variant="primary")
with gr.Column(scale=3):
router_result = gr.Textbox(label="Response", lines=10, interactive=False)
with gr.Row():
router_brain_used = gr.Textbox(label="Brain Used", interactive=False)
router_confidence = gr.Textbox(label="Confidence", interactive=False)
router_latency = gr.Textbox(label="Latency", interactive=False)
router_explanation = gr.HTML(label="Routing Explanation")
router_trace = gr.JSON(label="Pipeline Trace", visible=False)
def on_mode_change(mode):
return gr.Dropdown(visible=(mode == "manual"))
router_mode.change(on_mode_change, inputs=router_mode, outputs=router_brain)
def do_route(query, context, mode, brain_id, model):
if not query.strip():
return "Enter a query.", "", "", "", "", {}
start = time.time()
payload = {"query": query, "context": context, "model": model, "modality": "text"}
notice = ""
if mode == "council":
results = call_all_brains("/cogitate", payload)
responses = []
for bid, result in results.items():
name = BRAINS.get(bid, {}).get("name", bid)
resp = result.get("response", f"Error: {result.get('error', 'Unknown')}")
conf = result.get("confidence", 0)
responses.append(f"### {name} (confidence: {conf:.2f})\n{resp[:500]}")
summary_prompt = (
f"Synthesize these {len(responses)} expert opinions into one coherent response:\n\n"
+ "\n\n---\n\n".join(responses)
)
summary = call_openrouter(
summary_prompt,
model,
"You are a synthesis engine. Combine multiple expert opinions into one clear answer.",
0.3,
2048,
)
full_response = (
f"## ๐๏ธ Parliament Consensus\n\n{summary.get('content', 'Synthesis failed')}\n\n---\n\n"
f"## Individual Responses\n\n" + "\n\n".join(responses)
)
elapsed = int((time.time() - start) * 1000)
return (
full_response,
"LLM Parliament (All 5)",
f"{summary.get('confidence', 0):.2f}" if isinstance(summary, dict) else "N/A",
f"{elapsed}ms",
"All 5 brains deliberated and reached consensus
",
{},
)
elif mode == "auto":
brain_id, matched = route_query(query)
# Build explanation
matched_kws = matched.get(brain_id, [])
brain_name = BRAINS.get(brain_id, {}).get("name", brain_id)
if matched_kws:
kw_str = ", ".join(f"'{k}'" for k in matched_kws)
explanation = f"Routed to {brain_name} because your query matched: {kw_str}
"
else:
explanation = f"No strong keyword match โ defaulted to {brain_name}
"
else:
brain_name = BRAINS.get(brain_id, {}).get("name", brain_id)
explanation = f"Manually selected: {brain_name}
"
result = call_brain(brain_id, "/cogitate", payload)
elapsed = int((time.time() - start) * 1000)
if "error" in result:
return f"Error: {result['error']}", brain_id, "", f"{elapsed}ms", explanation, {}
brain_name = BRAINS.get(brain_id, {}).get("name", brain_id)
trace = result.get("pipelineTrace", {})
return (
result.get("response", ""),
f"{brain_name} ({brain_id})",
f"{result.get('confidence', 0)*100:.1f}%",
f"{trace.get('totalLatencyMs', elapsed)}ms",
explanation,
trace,
)
router_btn.click(
do_route,
inputs=[router_query, router_context, router_mode, router_brain, router_model],
outputs=[router_result, router_brain_used, router_confidence, router_latency, router_explanation, router_trace],
)
def build_pulse_proxy():
with gr.Tab("๐ฌ PULSE"):
gr.Markdown("## Direct LLM Access\nBypass the cognitive pipeline and query any free model directly.")
with gr.Row():
with gr.Column(scale=2):
pulse_prompt = gr.Textbox(label="Prompt", placeholder="Enter prompt...", lines=4)
pulse_system = gr.Textbox(label="System Prompt", placeholder="You are CAPT...", lines=2)
gr.HTML("""
๐ FREE TIER ONLY
DeepSeek V3 ยท Gemini Flash ยท Llama 3.1 8B
""")
pulse_model = gr.Dropdown(
choices=get_all_models(),
value="deepseek/deepseek-chat-v3-0324",
label="Model",
)
with gr.Row():
pulse_temp = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature")
pulse_max = gr.Slider(64, 2048, value=1024, step=64, label="Max Tokens")
pulse_btn = gr.Button("๐ฅ Generate", variant="primary")
with gr.Column(scale=3):
pulse_out = gr.Textbox(label="Output", lines=12, interactive=False)
pulse_meta = gr.JSON(label="Metadata")
def do_pulse(prompt, system, model, temp, maxtok):
if not prompt.strip():
return "", {}
result = call_openrouter(prompt, model, system, temp, maxtok)
if "error" in result:
return f"Error: {result['error']}", {}
meta = {
"model": result.get("model"),
"input_tokens": result.get("input_tokens"),
"output_tokens": result.get("output_tokens"),
"total_tokens": result.get("input_tokens", 0) + result.get("output_tokens", 0),
"cost": "$0.00 (free tier)",
}
return result.get("content", ""), meta
pulse_btn.click(do_pulse, inputs=[pulse_prompt, pulse_system, pulse_model, pulse_temp, pulse_max], outputs=[pulse_out, pulse_meta])
def build_api_tester():
with gr.Tab("๐ API"):
gr.Markdown("## Test Brain APIs\nDirect API proxy to all brain endpoints.")
with gr.Row():
with gr.Column(scale=2):
api_brain = gr.Dropdown(
choices=[(f"{b['icon']} {b['name']}", bid) for bid, b in BRAINS.items()],
value="capt-core-01",
label="Brain",
)
api_endpoint = gr.Dropdown(
choices=["/health", "/cogitate", "/pulse", "/echo/store", "/echo/recall", "/status"],
value="/health",
label="Endpoint",
)
api_method = gr.Radio(choices=["GET", "POST"], value="GET", label="Method")
api_payload = gr.Code(label="Payload (JSON)", language="json", value='{}')
api_btn = gr.Button("๐ก Send", variant="primary")
with gr.Column(scale=3):
api_response = gr.JSON(label="Response")
api_raw = gr.Textbox(label="Raw", lines=6, interactive=False)
def do_api(brain, endpoint, method, payload):
try:
body = json.loads(payload) if payload.strip() else {}
except json.JSONDecodeError as e:
return {"error": f"Invalid JSON: {e}"}, ""
worker = BRAINS.get(brain, {}).get("worker", "")
url = f"{worker}{endpoint}"
try:
if method == "GET":
r = requests.get(url, timeout=15)
else:
r = requests.post(url, json=body, timeout=60, headers={"Content-Type": "application/json"})
try:
data = r.json()
except Exception:
data = {"status_code": r.status_code, "text": r.text[:500]}
# Sanitize before display
if isinstance(data, dict):
data = {k: sanitize_error(v) if isinstance(v, str) else v for k, v in data.items()}
return data, sanitize_error(r.text[:1000])
except Exception as e:
return {"error": sanitize_error(e)}, ""
api_btn.click(do_api, inputs=[api_brain, api_endpoint, api_method, api_payload], outputs=[api_response, api_raw])
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# MAIN APP
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
_theme = gr.themes.Base(
primary_hue="cyan",
secondary_hue="violet",
neutral_hue="zinc",
font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
).set(
body_background_fill="*neutral_950",
body_text_color="*neutral_100",
block_background_fill="*neutral_900",
block_border_color="*neutral_800",
input_background_fill="*neutral_950",
button_primary_background_fill="*primary_600",
button_primary_text_color="white",
)
with gr.Blocks(
title="๐ฅ The Forge โ CAPT Cognitive Fleet",
css=FORGE_CSS,
theme=_theme,
) as demo:
gr.Markdown(
"# ๐ฅ The Forge\n"
""
"A public demo of multi-brain AI cognition โ 5 specialized brains, one orchestrator. "
"FREE MODELS ONLY"
"
"
)
build_fleet_dashboard()
build_cognitive_pipeline()
build_brain_router()
build_pulse_proxy()
build_api_tester()
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=int(os.getenv("PORT", "7860")),
share=False,
show_error=True,
theme=_theme,
)