""" ๐Ÿ”ฅ The Forge โ€” Public CAPT Fleet Controller A mind-blowing demo of real multi-brain AI cognition. SECURITY NOTES: - OPENROUTER_API_KEY must be set as HF Space secret only - All errors sanitized before reaching UI - Free models enforced server-side - No IP addresses or internal URLs exposed """ from __future__ import annotations import json import os import time import gradio as gr import requests from fleet_controller import ( BRAINS, HF_SPACES, ROUTING_KEYWORDS, MODEL_TIERS, TIER_LABELS, FREE_MODEL_IDS, FREE_MODEL_IDS as _FREE_MODEL_IDS, route_query, route_with_fallback, call_brain, call_all_brains, check_brain_health, check_all_brains, check_space_health, check_all_spaces, get_all_models, is_free_model, sanitize_error, ) OPENROUTER_KEY = os.getenv("OPENROUTER_API_KEY", "") # โ”€โ”€โ”€ Public-safe OpenRouter caller โ”€โ”€โ”€ def call_openrouter( prompt: str, model: str = "deepseek/deepseek-chat-v3-0324", system: str = "", temperature: float = 0.7, max_tokens: int = 2048, ) -> dict: if not OPENROUTER_KEY: return {"error": "OPENROUTER_API_KEY not configured โ€” contact admin"} # Server-side enforcement: free models only if not is_free_model(model): return {"error": f"Model '{model}' is not in the free tier. Only free models are available in this demo."} try: r = requests.post( "https://openrouter.ai/api/v1/chat/completions", headers={ "Authorization": f"Bearer {OPENROUTER_KEY}", "Content-Type": "application/json", "HTTP-Referer": "https://capt.dev", "X-Title": "CAPT-TheForge-Public", }, json={ "model": model, "messages": [ {"role": "system", "content": system or "You are CAPT Forge โ€” a public demo of multi-brain AI cognition."}, {"role": "user", "content": prompt}, ], "temperature": temperature, "max_tokens": max_tokens, }, timeout=60, ) data = r.json() if r.status_code != 200: err_msg = data.get("error", {}).get("message", r.text[:200]) return {"error": sanitize_error(err_msg)} return { "content": data["choices"][0]["message"]["content"], "model": data.get("model", model), "input_tokens": data.get("usage", {}).get("prompt_tokens", 0), "output_tokens": data.get("usage", {}).get("completion_tokens", 0), } except requests.exceptions.Timeout: return {"error": "OpenRouter timed out โ€” try again in a moment"} except Exception as e: return {"error": sanitize_error(e)} # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• # CUSTOM CSS โ€” Dark cinematic theme with glow effects # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• FORGE_CSS = """ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap'); :root { --forge-bg: #0a0a0f; --forge-panel: #111118; --forge-border: #1e1e2e; --forge-cyan: #00d4ff; --forge-purple: #a855f7; --forge-green: #4ade80; --forge-gold: #fbbf24; --forge-red: #ef4444; } body { font-family: 'Inter', system-ui, sans-serif !important; background: var(--forge-bg) !important; } /* Glowing borders on active elements */ .gr-button-primary { background: linear-gradient(135deg, #00d4ff 0%, #a855f7 100%) !important; border: none !important; box-shadow: 0 0 20px rgba(0, 212, 255, 0.3) !important; transition: all 0.3s ease !important; } .gr-button-primary:hover { box-shadow: 0 0 30px rgba(0, 212, 255, 0.5) !important; transform: translateY(-1px) !important; } /* Pipeline module styles */ .pipeline-module { display: inline-block; padding: 6px 14px; margin: 3px; border-radius: 20px; font-size: 12px; font-weight: 500; font-family: 'JetBrains Mono', monospace; border: 1px solid; transition: all 0.5s ease; } .pipeline-inactive { opacity: 0.3; background: transparent; color: #666; border-color: #333; } .pipeline-active { opacity: 1; box-shadow: 0 0 15px currentColor; } .pipeline-pulse { color: #a855f7; background: rgba(168,85,247,0.15); border-color: #a855f7; } .pipeline-neda { color: #3b82f6; background: rgba(59,130,246,0.15); border-color: #3b82f6; } .pipeline-hmc { color: #06b6d4; background: rgba(6,182,212,0.15); border-color: #06b6d4; } .pipeline-cig { color: #f59e0b; background: rgba(245,158,11,0.15); border-color: #f59e0b; } .pipeline-hdr { color: #ec4899; background: rgba(236,72,153,0.15); border-color: #ec4899; } .pipeline-qipc { color: #fbbf24; background: rgba(251,191,36,0.15); border-color: #fbbf24; } .pipeline-echo { color: #4ade80; background: rgba(74,222,128,0.15); border-color: #4ade80; } .pipeline-meta { color: #94a3b8; background: rgba(148,163,184,0.15); border-color: #94a3b8; } .pipeline-immu { color: #22c55e; background: rgba(34,197,94,0.15); border-color: #22c55e; } .pipeline-nds { color: #00d4ff; background: rgba(0,212,255,0.15); border-color: #00d4ff; } /* Status indicators */ .status-pulse { display: inline-block; width: 8px; height: 8px; border-radius: 50%; animation: pulse-glow 2s infinite; } .status-online { background: #4ade80; box-shadow: 0 0 8px #4ade80; } .status-offline { background: #ef4444; box-shadow: 0 0 8px #ef4444; } @keyframes pulse-glow { 0%, 100% { opacity: 1; } 50% { opacity: 0.5; } } /* Monospace JSON blocks */ .json-output { font-family: 'JetBrains Mono', monospace !important; font-size: 13px !important; } /* Free badge */ .free-badge { display: inline-block; padding: 2px 8px; background: rgba(74,222,128,0.2); color: #4ade80; border: 1px solid rgba(74,222,128,0.4); border-radius: 12px; font-size: 11px; font-weight: 600; margin-left: 8px; } """ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• # PIPELINE VISUALIZATION # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• PIPELINE_MODULES = [ ("PULSE", "Language Processing", "pipeline-pulse"), ("NEDA", "Pattern Recognition", "pipeline-neda"), ("HMC", "Memory Encoding", "pipeline-hmc"), ("CIG", "Causal Inference", "pipeline-cig"), ("HDR", "Hyperdimensional Reasoning", "pipeline-hdr"), ("QIPC", "Quantum Consensus", "pipeline-qipc"), ("ECHO", "Episodic Memory", "pipeline-echo"), ("META", "Self-Monitoring", "pipeline-meta"), ("IMMU", "Constitutional Guard", "pipeline-immu"), ("NDS", "Decision Synthesis", "pipeline-nds"), ] def render_pipeline_html(active_modules: list[str] | None = None) -> str: """Render the cognitive pipeline as HTML with active modules glowing.""" active = set(active_modules or []) arrows = " โ†’ " parts = [] for mod, desc, cls in PIPELINE_MODULES: state = "pipeline-active" if mod in active else "pipeline-inactive" parts.append(f'{mod}') return f"""
{arrows.join(parts)}
""" # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• # TAB BUILDERS # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• def build_fleet_dashboard(): with gr.Tab("๐ŸŽ›๏ธ Fleet"): gr.Markdown("## CAPT Fleet Status\nReal-time health of the cognitive architecture.") with gr.Row(): refresh_btn = gr.Button("๐Ÿ”„ Refresh", variant="primary") auto_refresh = gr.Checkbox(label="Auto-refresh (30s)", value=False) gr.Markdown("### ๐Ÿง  Brain Health") brain_table = gr.Dataframe( headers=["", "Brain", "Status", "Latency", "Load"], datatype=["str", "str", "str", "str", "str"], interactive=False, wrap=True, ) gr.Markdown("### ๐Ÿš€ Space Fleet") space_table = gr.Dataframe( headers=["Space", "Status", "HTTP", "Latency", "Note"], datatype=["str", "str", "str", "str", "str"], interactive=False, wrap=True, ) system_load = gr.HTML(label="System Load") def refresh_dashboard(): brains = check_all_brains() spaces = check_all_spaces() brain_rows = [] latencies = [] for b in brains: icon = "๐ŸŸข" if b.get("online") else "๐Ÿ”ด" lat = b.get("latency_ms", 0) latencies.append(lat) load_bar = "โ–ˆ" * min(lat // 50, 10) + "โ–‘" * (10 - min(lat // 50, 10)) brain_rows.append([ BRAINS.get(b["brain_id"], {}).get("icon", "๐Ÿง "), b.get("name", b["brain_id"]), f"{icon} {b.get('status', 'unknown')}", f"{lat}ms" if b.get("online") else "โ€”", load_bar if b.get("online") else "โ€”", ]) space_rows = [] for s in spaces: icon = "๐ŸŸข" if s.get("online") else "๐Ÿ”ด" space_rows.append([ s.get("name", s["id"]), f"{icon} {s.get('status', 'unknown')}", str(s.get("http_code", "โ€”")), f"{s.get('latency_ms', 0)}ms" if s.get("online") else "โ€”", HF_SPACES.get(s["id"], {}).get("issue", ""), ]) # System load gauge avg_lat = sum(latencies) / max(len(latencies), 1) load_pct = min(avg_lat / 500 * 100, 100) color = "#4ade80" if load_pct < 50 else "#fbbf24" if load_pct < 80 else "#ef4444" load_html = f"""
SYSTEM LOAD {load_pct:.0f}%
Avg latency: {avg_lat:.0f}ms across {len(latencies)} brains
""" return brain_rows, space_rows, load_html refresh_btn.click(refresh_dashboard, outputs=[brain_table, space_table, system_load]) demo.load(refresh_dashboard, outputs=[brain_table, space_table, system_load]) def build_cognitive_pipeline(): with gr.Tab("โšก Pipeline"): gr.Markdown("## Cognitive Pipeline\nWatch how CAPT processes your query through 10 specialized modules.") pipeline_input = gr.Textbox( label="Enter a query to visualize", placeholder="e.g. 'What is consciousness?'", lines=2, ) pipeline_btn = gr.Button("๐Ÿ”ฎ Run Pipeline", variant="primary") pipeline_viz = gr.HTML(label="Pipeline Visualization") pipeline_result = gr.Textbox(label="Response", lines=6, interactive=False) pipeline_meta = gr.JSON(label="Trace Metadata") def run_pipeline(query: str): if not query.strip(): return render_pipeline_html(), "Enter a query.", {} # Show all modules activating in sequence viz = render_pipeline_html([m[0] for m in PIPELINE_MODULES]) # Route with fallback brain_id, health, notice = route_with_fallback(query) payload = {"query": query, "context": "", "model": "deepseek/deepseek-chat-v3-0324", "modality": "text"} result = call_brain(brain_id, "/cogitate", payload) if "error" in result: return viz, f"Error: {result['error']}", {} trace = result.get("pipelineTrace", {}) modules_hit = trace.get("modules", []) confidence = result.get("confidence", 0) # Re-render with only hit modules active viz = render_pipeline_html(modules_hit) meta = { "brain": BRAINS.get(brain_id, {}).get("name", brain_id), "confidence": f"{confidence*100:.1f}%", "latency_ms": trace.get("totalLatencyMs", 0), "modules_activated": modules_hit, "fallback_notice": notice, } return viz, result.get("response", ""), meta pipeline_btn.click(run_pipeline, inputs=[pipeline_input], outputs=[pipeline_viz, pipeline_result, pipeline_meta]) def build_brain_router(): with gr.Tab("๐Ÿง  Router"): gr.Markdown("## Brain Router\nCAPT auto-routes your query to the best specialized brain โ€” or uses all 5 as a parliament.") with gr.Row(): with gr.Column(scale=2): router_query = gr.Textbox(label="Query", placeholder="Ask anything...", lines=3) router_context = gr.Textbox(label="Context (optional)", placeholder="Additional context", lines=2) with gr.Row(): router_mode = gr.Radio( choices=[("Auto-Route", "auto"), ("Manual", "manual"), ("Parliament (All 5)", "council")], value="auto", label="Mode" ) router_brain = gr.Dropdown( choices=[(f"{b['icon']} {b['name']}", bid) for bid, b in BRAINS.items()], value="capt-core-01", label="Target Brain", visible=False, ) router_model = gr.Dropdown( choices=get_all_models(), value="deepseek/deepseek-chat-v3-0324", label="Model", interactive=False, ) gr.HTML('
๐Ÿ’š FREE TIER ONLY
') router_btn = gr.Button("โšก Execute", variant="primary") with gr.Column(scale=3): router_result = gr.Textbox(label="Response", lines=10, interactive=False) with gr.Row(): router_brain_used = gr.Textbox(label="Brain Used", interactive=False) router_confidence = gr.Textbox(label="Confidence", interactive=False) router_latency = gr.Textbox(label="Latency", interactive=False) router_explanation = gr.HTML(label="Routing Explanation") router_trace = gr.JSON(label="Pipeline Trace", visible=False) def on_mode_change(mode): return gr.Dropdown(visible=(mode == "manual")) router_mode.change(on_mode_change, inputs=router_mode, outputs=router_brain) def do_route(query, context, mode, brain_id, model): if not query.strip(): return "Enter a query.", "", "", "", "", {} start = time.time() payload = {"query": query, "context": context, "model": model, "modality": "text"} notice = "" if mode == "council": results = call_all_brains("/cogitate", payload) responses = [] for bid, result in results.items(): name = BRAINS.get(bid, {}).get("name", bid) resp = result.get("response", f"Error: {result.get('error', 'Unknown')}") conf = result.get("confidence", 0) responses.append(f"### {name} (confidence: {conf:.2f})\n{resp[:500]}") summary_prompt = ( f"Synthesize these {len(responses)} expert opinions into one coherent response:\n\n" + "\n\n---\n\n".join(responses) ) summary = call_openrouter( summary_prompt, model, "You are a synthesis engine. Combine multiple expert opinions into one clear answer.", 0.3, 2048, ) full_response = ( f"## ๐Ÿ›๏ธ Parliament Consensus\n\n{summary.get('content', 'Synthesis failed')}\n\n---\n\n" f"## Individual Responses\n\n" + "\n\n".join(responses) ) elapsed = int((time.time() - start) * 1000) return ( full_response, "LLM Parliament (All 5)", f"{summary.get('confidence', 0):.2f}" if isinstance(summary, dict) else "N/A", f"{elapsed}ms", "
All 5 brains deliberated and reached consensus
", {}, ) elif mode == "auto": brain_id, matched = route_query(query) # Build explanation matched_kws = matched.get(brain_id, []) brain_name = BRAINS.get(brain_id, {}).get("name", brain_id) if matched_kws: kw_str = ", ".join(f"'{k}'" for k in matched_kws) explanation = f"
Routed to {brain_name} because your query matched: {kw_str}
" else: explanation = f"
No strong keyword match โ€” defaulted to {brain_name}
" else: brain_name = BRAINS.get(brain_id, {}).get("name", brain_id) explanation = f"
Manually selected: {brain_name}
" result = call_brain(brain_id, "/cogitate", payload) elapsed = int((time.time() - start) * 1000) if "error" in result: return f"Error: {result['error']}", brain_id, "", f"{elapsed}ms", explanation, {} brain_name = BRAINS.get(brain_id, {}).get("name", brain_id) trace = result.get("pipelineTrace", {}) return ( result.get("response", ""), f"{brain_name} ({brain_id})", f"{result.get('confidence', 0)*100:.1f}%", f"{trace.get('totalLatencyMs', elapsed)}ms", explanation, trace, ) router_btn.click( do_route, inputs=[router_query, router_context, router_mode, router_brain, router_model], outputs=[router_result, router_brain_used, router_confidence, router_latency, router_explanation, router_trace], ) def build_pulse_proxy(): with gr.Tab("๐Ÿ’ฌ PULSE"): gr.Markdown("## Direct LLM Access\nBypass the cognitive pipeline and query any free model directly.") with gr.Row(): with gr.Column(scale=2): pulse_prompt = gr.Textbox(label="Prompt", placeholder="Enter prompt...", lines=4) pulse_system = gr.Textbox(label="System Prompt", placeholder="You are CAPT...", lines=2) gr.HTML("""
๐Ÿ’š FREE TIER ONLY DeepSeek V3 ยท Gemini Flash ยท Llama 3.1 8B
""") pulse_model = gr.Dropdown( choices=get_all_models(), value="deepseek/deepseek-chat-v3-0324", label="Model", ) with gr.Row(): pulse_temp = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature") pulse_max = gr.Slider(64, 2048, value=1024, step=64, label="Max Tokens") pulse_btn = gr.Button("๐Ÿ”ฅ Generate", variant="primary") with gr.Column(scale=3): pulse_out = gr.Textbox(label="Output", lines=12, interactive=False) pulse_meta = gr.JSON(label="Metadata") def do_pulse(prompt, system, model, temp, maxtok): if not prompt.strip(): return "", {} result = call_openrouter(prompt, model, system, temp, maxtok) if "error" in result: return f"Error: {result['error']}", {} meta = { "model": result.get("model"), "input_tokens": result.get("input_tokens"), "output_tokens": result.get("output_tokens"), "total_tokens": result.get("input_tokens", 0) + result.get("output_tokens", 0), "cost": "$0.00 (free tier)", } return result.get("content", ""), meta pulse_btn.click(do_pulse, inputs=[pulse_prompt, pulse_system, pulse_model, pulse_temp, pulse_max], outputs=[pulse_out, pulse_meta]) def build_api_tester(): with gr.Tab("๐Ÿ”Œ API"): gr.Markdown("## Test Brain APIs\nDirect API proxy to all brain endpoints.") with gr.Row(): with gr.Column(scale=2): api_brain = gr.Dropdown( choices=[(f"{b['icon']} {b['name']}", bid) for bid, b in BRAINS.items()], value="capt-core-01", label="Brain", ) api_endpoint = gr.Dropdown( choices=["/health", "/cogitate", "/pulse", "/echo/store", "/echo/recall", "/status"], value="/health", label="Endpoint", ) api_method = gr.Radio(choices=["GET", "POST"], value="GET", label="Method") api_payload = gr.Code(label="Payload (JSON)", language="json", value='{}') api_btn = gr.Button("๐Ÿ“ก Send", variant="primary") with gr.Column(scale=3): api_response = gr.JSON(label="Response") api_raw = gr.Textbox(label="Raw", lines=6, interactive=False) def do_api(brain, endpoint, method, payload): try: body = json.loads(payload) if payload.strip() else {} except json.JSONDecodeError as e: return {"error": f"Invalid JSON: {e}"}, "" worker = BRAINS.get(brain, {}).get("worker", "") url = f"{worker}{endpoint}" try: if method == "GET": r = requests.get(url, timeout=15) else: r = requests.post(url, json=body, timeout=60, headers={"Content-Type": "application/json"}) try: data = r.json() except Exception: data = {"status_code": r.status_code, "text": r.text[:500]} # Sanitize before display if isinstance(data, dict): data = {k: sanitize_error(v) if isinstance(v, str) else v for k, v in data.items()} return data, sanitize_error(r.text[:1000]) except Exception as e: return {"error": sanitize_error(e)}, "" api_btn.click(do_api, inputs=[api_brain, api_endpoint, api_method, api_payload], outputs=[api_response, api_raw]) # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• # MAIN APP # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• _theme = gr.themes.Base( primary_hue="cyan", secondary_hue="violet", neutral_hue="zinc", font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"], ).set( body_background_fill="*neutral_950", body_text_color="*neutral_100", block_background_fill="*neutral_900", block_border_color="*neutral_800", input_background_fill="*neutral_950", button_primary_background_fill="*primary_600", button_primary_text_color="white", ) with gr.Blocks( title="๐Ÿ”ฅ The Forge โ€” CAPT Cognitive Fleet", css=FORGE_CSS, theme=_theme, ) as demo: gr.Markdown( "# ๐Ÿ”ฅ The Forge\n" "

" "A public demo of multi-brain AI cognition โ€” 5 specialized brains, one orchestrator. " "FREE MODELS ONLY" "

" ) build_fleet_dashboard() build_cognitive_pipeline() build_brain_router() build_pulse_proxy() build_api_tester() if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")), share=False, show_error=True, theme=_theme, )