Layers / layers.html
Chris4K's picture
Update layers.html
2d4d3ec verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>AI System Architecture — Reference Blueprint</title>
<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.1/css/all.min.css" rel="stylesheet">
<link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Outfit:wght@300;400;500;600;700&display=swap" rel="stylesheet">
<style>
:root {
--bg:#05080f; --bg1:#0a0f1a; --bg2:#101728; --bg3:#182038;
--border:#1e2a45; --border2:#2a3a5c; --border3:#3a4a6b;
--text:#c8d0e0; --text2:#6a7a9b; --text3:#3a4a6b;
--white:#e8ecf4;
--accent:#4a90ff; --accent2:#7c5cfc;
--green:#22c982; --amber:#f5a623; --rose:#f06292; --cyan:#00d4e6;
--red:#ef5350; --orange:#ff8a50; --lime:#84cc16; --sky:#38bdf8;
--c-present:#ef5350; --c-orch:#22c982; --c-llm:#7c5cfc;
--c-data:#f5a623; --c-infra:#38bdf8;
--c-sec:#f06292; --c-obs:#9c7cfc; --c-conf:#6a7a9b;
--c-mem:#00d4e6; --c-life:#84cc16; --c-dev:#ff8a50;
--mono:'IBM Plex Mono',monospace;
--sans:'Outfit',sans-serif;
}
*,*::before,*::after{margin:0;padding:0;box-sizing:border-box}
html{font-size:14px}
body{font-family:var(--sans);background:var(--bg);color:var(--text);min-height:100vh;overflow-x:hidden}
body::after{content:'';position:fixed;inset:0;background:url("data:image/svg+xml,%3Csvg viewBox='0 0 256 256' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.8' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)' opacity='0.025'/%3E%3C/svg%3E");pointer-events:none;z-index:9999}
body::before{content:'';position:fixed;inset:0;background-image:linear-gradient(var(--border) 1px,transparent 1px),linear-gradient(90deg,var(--border) 1px,transparent 1px);background-size:60px 60px;opacity:0.1;pointer-events:none}
::selection{background:var(--accent);color:var(--bg)}
/* ═══ HEADER ═══ */
.hdr{padding:20px 28px 0;display:flex;align-items:center;gap:16px;flex-wrap:wrap;position:relative;z-index:10}
.hdr-mark{width:32px;height:32px;border:2px solid var(--accent);border-radius:7px;display:flex;align-items:center;justify-content:center;font-family:var(--mono);font-weight:700;font-size:0.7rem;color:var(--accent);position:relative;overflow:hidden;flex-shrink:0}
.hdr-mark::after{content:'';position:absolute;inset:0;background:var(--accent);opacity:0.06}
.hdr h1{font-family:var(--mono);font-size:1.05rem;font-weight:600;color:var(--white);letter-spacing:-0.02em}
.hdr h1 b{color:var(--accent)}
.hdr-r{margin-left:auto;font-family:var(--mono);font-size:0.68rem;color:var(--text3);display:flex;gap:12px;align-items:center}
.hdr-r kbd{display:inline-block;padding:1px 5px;border:1px solid var(--border2);border-radius:3px;font-size:0.65rem;background:var(--bg2);color:var(--text3);margin:0 1px}
/* ═══ TOOLBAR ═══ */
.toolbar{padding:10px 28px;display:flex;gap:6px;align-items:center;flex-wrap:wrap;position:relative;z-index:10}
.tb{font-family:var(--mono);font-size:0.67rem;padding:5px 12px;border-radius:5px;border:1px solid var(--border2);background:var(--bg2);color:var(--text2);cursor:pointer;transition:all 0.2s;display:flex;align-items:center;gap:5px;white-space:nowrap}
.tb:hover{border-color:var(--accent);color:var(--white)}
.tb.on{border-color:var(--accent);color:var(--accent);background:rgba(74,144,255,0.06)}
.tb i{font-size:0.62rem}
.tb-sep{width:1px;height:18px;background:var(--border);margin:0 2px}
/* ═══ MAIN LAYOUT ═══ */
.main{display:grid;grid-template-columns:1fr 400px;gap:16px;padding:0 28px 32px;position:relative;z-index:10}
@media(max-width:1080px){.main{grid-template-columns:1fr}.hdr,.toolbar,.main{padding-left:12px;padding-right:12px}}
/* ═══ VIZ PANEL ═══ */
.viz{background:var(--bg1);border:1px solid var(--border);border-radius:12px;padding:16px;position:relative;overflow:hidden}
.viz::before{content:'';position:absolute;width:500px;height:500px;background:radial-gradient(circle,rgba(74,144,255,0.03),transparent 70%);top:-120px;left:-120px;pointer-events:none}
/* ═══ ARCHITECTURE GRID ═══ */
.arch{display:grid;grid-template-columns:auto 1fr;gap:0;position:relative}
/* cross-cutting sidebar */
.xcut{display:flex;gap:4px;padding-right:10px}
.xbar{width:36px;border-radius:6px;border:1px solid var(--border);cursor:pointer;transition:all 0.25s;position:relative;display:flex;flex-direction:column;align-items:center;justify-content:center;gap:6px;padding:8px 0}
.xbar:hover{border-color:var(--border2);background:rgba(255,255,255,0.015)}
.xbar.sel{border-color:color-mix(in srgb, var(--xc) 50%, transparent);box-shadow:0 0 16px color-mix(in srgb, var(--xc) 12%, transparent)}
.xbar .xdot{width:6px;height:6px;border-radius:50%;background:var(--xc)}
.xbar .xicon{font-size:0.7rem;color:var(--xc);opacity:0.7}
.xbar .xlabel{writing-mode:vertical-rl;text-orientation:mixed;font-family:var(--mono);font-size:0.55rem;font-weight:600;text-transform:uppercase;letter-spacing:0.12em;color:var(--text3);transform:rotate(180deg)}
/* core stack */
.stack{display:flex;flex-direction:column;gap:4px}
.lyr{display:grid;grid-template-columns:30px 1fr auto;align-items:center;gap:10px;padding:11px 14px;border-radius:7px;cursor:pointer;border:1px solid transparent;transition:all 0.22s;position:relative;background:color-mix(in srgb, var(--lc) 5%, var(--bg2))}
.lyr::before{content:'';position:absolute;left:0;top:0;bottom:0;width:3px;border-radius:3px 0 0 3px;background:var(--lc);opacity:0;transition:opacity 0.2s}
.lyr:hover{background:color-mix(in srgb, var(--lc) 8%, var(--bg2));transform:translateX(3px)}
.lyr:hover::before{opacity:0.5}
.lyr.sel{background:color-mix(in srgb, var(--lc) 12%, var(--bg2));border-color:color-mix(in srgb, var(--lc) 30%, transparent);transform:translateX(5px);box-shadow:0 0 20px color-mix(in srgb, var(--lc) 10%, transparent)}
.lyr.sel::before{opacity:1}
.lyr.flash{animation:flashLyr 0.5s ease-out}
@keyframes flashLyr{0%{box-shadow:0 0 0 0 color-mix(in srgb, var(--lc) 50%, transparent)}50%{box-shadow:0 0 28px 4px color-mix(in srgb, var(--lc) 35%, transparent)}100%{box-shadow:0 0 20px color-mix(in srgb, var(--lc) 10%, transparent)}}
.lyr .ico{width:30px;height:30px;border-radius:6px;display:flex;align-items:center;justify-content:center;font-size:12px;background:color-mix(in srgb, var(--lc) 15%, var(--bg3));color:var(--lc)}
.lyr .nm{font-weight:600;font-size:0.85rem}
.lyr .nm small{font-weight:400;color:var(--text2);font-size:0.72rem;margin-left:6px}
.lyr .tgs{display:flex;gap:3px;flex-wrap:wrap;justify-content:flex-end}
.lyr .tgs span{font-family:var(--mono);font-size:0.54rem;padding:2px 5px;border-radius:3px;background:rgba(255,255,255,0.04);color:var(--text3);letter-spacing:0.02em}
.fcon{display:flex;justify-content:center;height:6px;position:relative}
.fcon::before{content:'';width:1px;height:100%;background:var(--border2)}
/* ═══ LIFECYCLE LOOP ═══ */
.lifecycle{margin-top:10px;border:1px solid var(--border);border-radius:7px;padding:10px 14px;display:flex;align-items:center;gap:10px;cursor:pointer;transition:all 0.22s}
.lifecycle:hover{border-color:var(--border2);background:rgba(132,204,22,0.03)}
.lifecycle.sel{border-color:color-mix(in srgb, var(--c-life) 40%, transparent);box-shadow:0 0 16px color-mix(in srgb, var(--c-life) 10%, transparent)}
.lifecycle .lf-icon{color:var(--c-life);font-size:0.75rem}
.lifecycle .lf-label{font-family:var(--mono);font-size:0.7rem;font-weight:600;color:var(--c-life)}
.lifecycle .lf-steps{font-family:var(--mono);font-size:0.58rem;color:var(--text3);display:flex;gap:4px;flex-wrap:wrap;align-items:center}
.lifecycle .lf-steps span{padding:2px 6px;border-radius:3px;border:1px solid var(--border);background:var(--bg2)}
.lifecycle .lf-steps .arr{border:none;background:none;color:var(--text3);padding:0}
/* ═══ DEV PRACTICES ═══ */
.devprac{margin-top:6px;border:1px dashed var(--border2);border-radius:7px;padding:10px 14px;display:flex;align-items:center;gap:10px;cursor:pointer;transition:all 0.22s;opacity:0.7}
.devprac:hover{opacity:1;border-color:var(--c-dev)}
.devprac.sel{opacity:1;border-color:var(--c-dev);box-shadow:0 0 12px color-mix(in srgb, var(--c-dev) 8%, transparent)}
.devprac .dp-icon{color:var(--c-dev);font-size:0.7rem}
.devprac .dp-label{font-family:var(--mono);font-size:0.68rem;font-weight:600;color:var(--c-dev)}
.devprac .dp-sub{font-family:var(--mono);font-size:0.56rem;color:var(--text3)}
/* ═══ DETAIL PANEL ═══ */
.det{background:var(--bg1);border:1px solid var(--border);border-radius:12px;overflow:hidden;display:flex;flex-direction:column;max-height:calc(100vh - 100px);position:sticky;top:12px}
.det-tabs{display:flex;border-bottom:1px solid var(--border);background:var(--bg2);flex-shrink:0}
.dt{flex:1;padding:9px 6px;text-align:center;font-family:var(--mono);font-size:0.63rem;font-weight:500;color:var(--text3);cursor:pointer;border-bottom:2px solid transparent;transition:all 0.2s;text-transform:uppercase;letter-spacing:0.06em}
.dt:hover{color:var(--text2)}
.dt.act{color:var(--accent);border-bottom-color:var(--accent)}
.det-body{padding:20px;overflow-y:auto;flex:1}
.det-body::-webkit-scrollbar{width:5px}
.det-body::-webkit-scrollbar-track{background:transparent}
.det-body::-webkit-scrollbar-thumb{background:var(--border2);border-radius:3px}
.d-title{font-family:var(--mono);font-size:1rem;font-weight:700;color:var(--white);margin-bottom:3px}
.d-bar{width:32px;height:3px;border-radius:2px;margin-bottom:16px}
.d-h{font-family:var(--mono);font-size:0.64rem;font-weight:600;text-transform:uppercase;letter-spacing:0.1em;color:var(--text3);margin:18px 0 6px}
.d-h:first-of-type{margin-top:0}
.d-p{font-size:0.82rem;line-height:1.6;color:var(--text)}
.d-ul{list-style:none;padding:0}
.d-ul li{padding:4px 0;font-size:0.8rem;color:var(--text);display:flex;gap:7px;line-height:1.45}
.d-ul li::before{content:'▸';color:var(--text3);flex-shrink:0;font-size:0.65rem;margin-top:2px}
.d-badges{display:flex;flex-wrap:wrap;gap:4px}
.d-badge{font-family:var(--mono);font-size:0.63rem;padding:3px 8px;border-radius:4px;background:var(--bg3);border:1px solid var(--border);color:var(--text2);transition:all 0.2s;cursor:default}
.d-badge:hover{border-color:var(--accent);color:var(--white)}
.d-code{background:var(--bg);border:1px solid var(--border);border-radius:7px;padding:12px 14px;margin-top:6px;font-family:var(--mono);font-size:0.64rem;line-height:1.75;color:var(--text2);overflow-x:auto;white-space:pre}
.d-code .k{color:var(--accent)} .d-code .s{color:var(--green)} .d-code .c{color:var(--text3);font-style:italic} .d-code .f{color:var(--amber)} .d-code .t{color:var(--cyan)}
.d-pattern{background:var(--bg);border:1px solid var(--border);border-radius:7px;padding:14px;margin-top:6px}
.d-pattern svg{width:100%;display:block}
.empty{display:flex;flex-direction:column;align-items:center;justify-content:center;height:350px;color:var(--text3);gap:8px;text-align:center}
.empty i{font-size:2rem;opacity:0.2}
.empty span{font-size:0.8rem}
/* ═══ TRACE BAR ═══ */
.trace-bar{position:fixed;bottom:0;left:0;right:0;background:var(--bg2);border-top:1px solid var(--border);padding:9px 28px;display:none;align-items:center;gap:14px;z-index:100;font-family:var(--mono);font-size:0.68rem}
.trace-bar.show{display:flex}
.trace-bar .t-prog{height:3px;background:var(--border);border-radius:2px;position:absolute;top:0;left:0;right:0}
.trace-bar .t-fill{height:100%;background:linear-gradient(90deg,var(--accent),var(--accent2));border-radius:2px;transition:width 0.35s;width:0%}
.trace-bar .t-lbl{color:var(--accent);font-weight:600}
.trace-bar .t-desc{color:var(--text2);flex:1}
.trace-bar .t-time{color:var(--text3)}
/* stagger */
@keyframes sUp{from{opacity:0;transform:translateY(10px)}to{opacity:1;transform:translateY(0)}}
.lyr,.xbar,.lifecycle,.devprac{animation:sUp 0.35s ease both}
.xbar:nth-child(1){animation-delay:.05s}.xbar:nth-child(2){animation-delay:.1s}.xbar:nth-child(3){animation-delay:.15s}.xbar:nth-child(4){animation-delay:.2s}
.lyr:nth-child(1){animation-delay:.12s}.lyr:nth-child(3){animation-delay:.18s}.lyr:nth-child(5){animation-delay:.24s}.lyr:nth-child(7){animation-delay:.30s}.lyr:nth-child(9){animation-delay:.36s}
</style>
</head>
<body>
<div class="hdr">
<div class="hdr-mark">AI</div>
<h1><b>//</b> AI System Architecture <b>·</b> Reference Blueprint</h1>
<div class="hdr-r">
<span><kbd></kbd><kbd></kbd> nav</span>
<span><kbd>Enter</kbd> tab</span>
<span><kbd>T</kbd> trace</span>
</div>
</div>
<div class="toolbar">
<button class="tb" onclick="runTrace(traceReq)"><i class="fas fa-route"></i> Trace Request</button>
<button class="tb" onclick="runTrace(traceRag)"><i class="fas fa-magnifying-glass"></i> RAG Flow</button>
<button class="tb" onclick="runTrace(traceAgent)"><i class="fas fa-robot"></i> Agent Loop</button>
<div class="tb-sep"></div>
<button class="tb" onclick="runTrace(traceFeedback)"><i class="fas fa-arrows-spin"></i> Feedback Loop</button>
</div>
<div class="main">
<!-- ═══ VIZ ═══ -->
<div class="viz">
<div class="arch">
<!-- CROSS-CUTTING BARS (left) -->
<div class="xcut">
<div class="xbar" data-key="security" style="--xc:var(--c-sec)">
<span class="xdot"></span>
<i class="fas fa-shield-halved xicon"></i>
<span class="xlabel">Security</span>
</div>
<div class="xbar" data-key="observability" style="--xc:var(--c-obs)">
<span class="xdot"></span>
<i class="fas fa-chart-line xicon"></i>
<span class="xlabel">Observability</span>
</div>
<div class="xbar" data-key="configuration" style="--xc:var(--c-conf)">
<span class="xdot"></span>
<i class="fas fa-gear xicon"></i>
<span class="xlabel">Config</span>
</div>
<div class="xbar" data-key="memory" style="--xc:var(--c-mem)">
<span class="xdot"></span>
<i class="fas fa-brain xicon"></i>
<span class="xlabel">Memory</span>
</div>
</div>
<!-- CORE STACK (right) -->
<div class="stack">
<div class="lyr" data-key="presentation" style="--lc:var(--c-present)">
<div class="ico"><i class="fas fa-desktop"></i></div>
<span class="nm">Presentation <small>UI · API · CLI</small></span>
<div class="tgs"><span>Gradio</span><span>Vue</span><span>Chainlit</span><span>SSE</span><span>CLI</span></div>
</div>
<div class="fcon"></div>
<div class="lyr" data-key="orchestration" style="--lc:var(--c-orch)">
<div class="ico"><i class="fas fa-sitemap"></i></div>
<span class="nm">Orchestration &amp; Agents</span>
<div class="tgs"><span>smolagents</span><span>CrewAI</span><span>LangGraph</span><span>MCP</span><span>A2A</span></div>
</div>
<div class="fcon"></div>
<div class="lyr" data-key="llm" style="--lc:var(--c-llm)">
<div class="ico"><i class="fas fa-microchip"></i></div>
<span class="nm">LLM Services <small>inference · embed · cache</small></span>
<div class="tgs"><span>Claude</span><span>DeepSeek</span><span>Kimi</span><span>Llama</span><span>LoRA</span></div>
</div>
<div class="fcon"></div>
<div class="lyr" data-key="data" style="--lc:var(--c-data)">
<div class="ico"><i class="fas fa-database"></i></div>
<span class="nm">Data &amp; Retrieval <small>store · pipeline · KG</small></span>
<div class="tgs"><span>pgvector</span><span>ChromaDB</span><span>Neo4j</span><span>MinIO</span><span>ETL</span></div>
</div>
<div class="fcon"></div>
<div class="lyr" data-key="infrastructure" style="--lc:var(--c-infra)">
<div class="ico"><i class="fas fa-server"></i></div>
<span class="nm">Infrastructure <small>compute · deploy · scale</small></span>
<div class="tgs"><span>Docker</span><span>K8s</span><span>Terraform</span><span>GPU</span><span>CI/CD</span></div>
</div>
<!-- LIFECYCLE LOOP -->
<div class="lifecycle" data-key="lifecycle">
<i class="fas fa-arrows-spin lf-icon"></i>
<span class="lf-label">Continuous Improvement</span>
<div class="lf-steps">
<span>Feedback</span><span class="arr"></span>
<span>Eval</span><span class="arr"></span>
<span>Fine-tune</span><span class="arr"></span>
<span>Deploy</span><span class="arr"></span>
<span>Monitor</span><span class="arr"></span>
<span class="arr"></span>
</div>
</div>
<!-- DEV PRACTICES -->
<div class="devprac" data-key="devpractices">
<i class="fas fa-code dp-icon"></i>
<span class="dp-label">Dev Practices</span>
<span class="dp-sub">Testing · CI/CD · Code Review · Documentation · LLM Eval</span>
</div>
</div>
</div>
</div>
<!-- ═══ DETAIL PANEL ═══ -->
<div class="det">
<div class="det-tabs">
<div class="dt act" data-tab="overview">Overview</div>
<div class="dt" data-tab="stack">Stack</div>
<div class="dt" data-tab="code">Code</div>
<div class="dt" data-tab="patterns">Patterns</div>
</div>
<div class="det-body" id="detBody">
<div class="empty">
<i class="fas fa-layer-group"></i>
<span>Click any layer or bar to explore<br>Press <kbd style="font-family:var(--mono);font-size:0.65rem;padding:1px 4px;border:1px solid var(--border2);border-radius:3px;background:var(--bg2)">T</kbd> to trace a request</span>
</div>
</div>
</div>
</div>
<!-- TRACE BAR -->
<div class="trace-bar" id="traceBar">
<div class="t-prog"><div class="t-fill" id="tFill"></div></div>
<i class="fas fa-circle-dot" style="color:var(--accent);font-size:0.55rem"></i>
<span class="t-lbl" id="tLbl"></span>
<span class="t-desc" id="tDesc"></span>
<span class="t-time" id="tTime">0ms</span>
</div>
<script>
// ═══════════════════════════════════════════════════════
// DATA — all 11 sections
// ═══════════════════════════════════════════════════════
const D = {
// ─── CORE STACK ───────────────────────────────────────
presentation: {
title:"Presentation", color:"var(--c-present)",
overview:{
purpose:"Every interface a user or system touches — browser UIs, chat, voice, API endpoints, CLI tools, mobile SDKs. Not all consumers are humans with browsers.",
components:[
"Web frameworks — Vue 3, React, Svelte, Astro, plain HTML+Alpine",
"AI-native UIs — Gradio, Streamlit, Chainlit, Open WebUI",
"Browser agents — OpenClaw / PicoClaw (single-file HTML + WebGPU/WebLLM)",
"API surfaces — REST, GraphQL, gRPC endpoints for machine consumers",
"CLI / Terminal — Claude Code pattern, click-based TUIs, REPL interfaces",
"Multi-modal I/O — text, voice (Whisper STT / TTS), image, video, PDF",
"Real-time — WebSockets, SSE (server-sent events), streaming token delivery",
"Mobile / native — React Native, Flutter, native SDKs wrapping the API layer"
]
},
stack:["Vue 3","React","Svelte","Gradio","Streamlit","Chainlit","Open WebUI","OpenClaw","WebSockets","SSE","Whisper","FastAPI","GraphQL","gRPC","CLI"],
code:`<span class="c">&lt;!-- OpenClaw: single-file browser agent (WebGPU) --&gt;</span>
<span class="k">&lt;script</span> type=<span class="s">"module"</span><span class="k">&gt;</span>
<span class="k">import</span> { CreateMLCEngine } <span class="k">from</span>
<span class="s">"https://esm.run/@mlc-ai/web-llm"</span>;
<span class="c">// Hermes-3 running entirely client-side</span>
<span class="k">const</span> engine = <span class="k">await</span> CreateMLCEngine(
<span class="s">"Hermes-3-Llama-3.1-8B-q4f16_1-MLC"</span>,
{ initProgressCallback: p =>
status.textContent = p.text }
);
<span class="c">// stream response with tool-calling</span>
<span class="k">const</span> stream = <span class="k">await</span> engine.chat.completions
.create({
messages: conversation,
stream: <span class="f">true</span>,
tools: toolDefinitions,
});
<span class="k">for await</span> (<span class="k">const</span> chunk <span class="k">of</span> stream) {
output.textContent +=
chunk.choices[<span class="f">0</span>]?.delta?.content || <span class="s">""</span>;
}
<span class="k">&lt;/script&gt;</span>`,
patterns:`<svg viewBox="0 0 340 90" xmlns="http://www.w3.org/2000/svg">
<text x="8" y="14" fill="#ef5350" font-size="8" font-family="IBM Plex Mono" font-weight="600">Multi-Surface Architecture</text>
<rect x="8" y="24" width="60" height="55" rx="4" fill="none" stroke="#ef5350" stroke-width="1.2"/>
<text x="38" y="44" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono">Browser</text>
<text x="38" y="54" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Chat UI</text>
<rect x="76" y="24" width="60" height="55" rx="4" fill="none" stroke="#ff8a50" stroke-width="1.2"/>
<text x="106" y="44" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono">CLI</text>
<text x="106" y="54" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Terminal</text>
<rect x="144" y="24" width="60" height="55" rx="4" fill="none" stroke="#f5a623" stroke-width="1.2"/>
<text x="174" y="44" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono">API</text>
<text x="174" y="54" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">REST/gRPC</text>
<rect x="212" y="24" width="60" height="55" rx="4" fill="none" stroke="#22c982" stroke-width="1.2"/>
<text x="242" y="44" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono">Mobile</text>
<text x="242" y="54" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">SDK</text>
<rect x="280" y="24" width="52" height="55" rx="4" fill="none" stroke="#00d4e6" stroke-width="1.2"/>
<text x="306" y="44" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono">Voice</text>
<text x="306" y="54" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Whisper</text>
</svg>`
},
orchestration: {
title:"Orchestration & Agents", color:"var(--c-orch)",
overview:{
purpose:"The brain — agent lifecycle, multi-step reasoning (ReAct), RAG pipelines, tool routing, workflow orchestration, human-in-the-loop, retries & circuit breakers.",
components:[
"Agent frameworks — smolagents (HF), CrewAI, AutoGen, Agent Zero, Hermes function-calling",
"Workflow engines — LangGraph, Prefect, Temporal, Airflow",
"RAG pipeline — chunking strategy → embed → retrieve → rerank → generate",
"Tool routing — MCP servers, OpenAI-style function calling, A2A (Agent-to-Agent protocol)",
"Prompt management — versioning, templates, A/B testing, dynamic few-shot selection",
"Context window management — token budgeting, sliding window, summarization",
"Resilience — retry with exponential backoff, circuit breakers, fallback chains, timeout policies",
"Human-in-the-loop — approval gates, escalation, confidence thresholds"
]
},
stack:["smolagents","CrewAI","AutoGen","Agent Zero","Hermes","LangGraph","Prefect","Temporal","LangChain","MCP","A2A","Haystack","DSPy"],
code:`<span class="c"># smolagents: multi-step code agent with MCP tools</span>
<span class="k">from</span> smolagents <span class="k">import</span> (
CodeAgent, HfApiModel, tool
)
<span class="t">@tool</span>
<span class="k">def</span> <span class="f">search_catalog</span>(query: str) -> str:
<span class="s">"""Search product catalog."""</span>
<span class="k">return</span> catalog.search(query, limit=<span class="f">10</span>)
<span class="t">@tool</span>
<span class="k">def</span> <span class="f">check_gdpr_status</span>(
system: str, record_id: str
) -> str:
<span class="s">"""Check deletion status."""</span>
<span class="k">return</span> deltionprotovcoll.check(system, record_id)
model = HfApiModel(<span class="s">"Qwen/Qwen2.5-72B"</span>)
agent = CodeAgent(
tools=[search_catalog, check_gdpr_status],
model=model,
max_steps=<span class="f">8</span>,
planning_interval=<span class="f">3</span>,
)
<span class="c"># Agent will: plan → search → observe → check → report</span>
result = agent.run(
<span class="s">"Find frozen pizza products, check their"</span>
<span class="s">" price in SAP"</span>
)`,
patterns:`<svg viewBox="0 0 340 110" xmlns="http://www.w3.org/2000/svg">
<text x="8" y="14" fill="#22c982" font-size="8" font-family="IBM Plex Mono" font-weight="600">ReAct Agent Loop + Circuit Breaker</text>
<rect x="120" y="24" width="80" height="26" rx="5" fill="none" stroke="#22c982" stroke-width="1.3"/>
<text x="160" y="41" text-anchor="middle" fill="#c8d0e0" font-size="8" font-family="IBM Plex Mono">Think/Plan</text>
<path d="M200 37 Q240 37 240 57 Q240 77 200 77" fill="none" stroke="#4a90ff" stroke-width="1"/>
<text x="255" y="60" fill="#4a90ff" font-size="7" font-family="IBM Plex Mono">Act (tools)</text>
<rect x="120" y="64" width="80" height="26" rx="5" fill="none" stroke="#f5a623" stroke-width="1.3"/>
<text x="160" y="81" text-anchor="middle" fill="#c8d0e0" font-size="8" font-family="IBM Plex Mono">Observe</text>
<path d="M120 77 Q80 77 80 57 Q80 37 120 37" fill="none" stroke="#f06292" stroke-width="1"/>
<text x="42" y="60" fill="#f06292" font-size="7" font-family="IBM Plex Mono">Loop</text>
<rect x="120" y="93" width="80" height="14" rx="3" fill="none" stroke="#ef5350" stroke-width="0.8" stroke-dasharray="3"/>
<text x="160" y="103" text-anchor="middle" fill="#ef5350" font-size="6" font-family="IBM Plex Mono">circuit breaker / timeout</text>
</svg>`
},
llm: {
title:"LLM Services", color:"var(--c-llm)",
overview:{
purpose:"Three operationally distinct sub-services: real-time inference (low latency, auto-scale), embedding service (high throughput, batch), offline fine-tuning (GPU-intensive, scheduled). Each scales differently.",
components:[
"ᴿᵀ Inference — Commercial: Claude, GPT-4o, Gemini 2, Kimi (Moonshot); Open: DeepSeek-V3/R1, Llama 3.x, Mistral, Qwen 2.5",
"ᴿᵀ Inference — Local: LM Studio, Ollama, vLLM, TGI, llama.cpp",
"ᴿᵀ Prompt caching — Claude prompt caching, Gemini context caching (massive cost reduction)",
"ᴿᵀ Structured output — JSON mode, tool use schemas, constrained generation (Outlines, LMQL)",
"ᴿᵀ Multimodal — vision (image→text), audio (Whisper), PDF parsing, video understanding",
"ᴱᴹᴮ Embedding service — BGE-M3, Nomic-embed, OpenAI text-embedding-3, sentence-transformers",
"ᶠᵀ Fine-tuning — LoRA, QLoRA, full SFT, DPO/RLHF, Unsloth, axolotl",
"ᶠᵀ Quantization — GGUF, AWQ, GPTQ, BitNet 1.58b, EXL2",
"Routing — litellm, OpenRouter, latency-based + cost-based routing, fallback chains"
]
},
stack:["Claude","GPT-4o","Gemini 2","Kimi","DeepSeek-V3","Llama 3","Mistral","Qwen 2.5","LM Studio","Ollama","vLLM","TGI","litellm","BGE-M3","Nomic","LoRA","QLoRA","Unsloth","BitNet","Outlines","LMQL"],
code:`<span class="c"># litellm: multi-provider routing + caching</span>
<span class="k">from</span> litellm <span class="k">import</span> Router
router = Router(model_list=[
{<span class="s">"model_name"</span>: <span class="s">"best"</span>,
<span class="s">"litellm_params"</span>: {
<span class="s">"model"</span>: <span class="s">"claude-sonnet-4-20250514"</span>,
<span class="s">"cache"</span>: {<span class="s">"type"</span>: <span class="s">"prompt_caching"</span>}}},
{<span class="s">"model_name"</span>: <span class="s">"best"</span>,
<span class="s">"litellm_params"</span>: {
<span class="s">"model"</span>: <span class="s">"deepseek/deepseek-chat"</span>}},
{<span class="s">"model_name"</span>: <span class="s">"local"</span>,
<span class="s">"litellm_params"</span>: {
<span class="s">"model"</span>: <span class="s">"ollama/llama3.1"</span>,
<span class="s">"api_base"</span>: <span class="s">"http://192.168.188.25:1234"</span>}},
], routing_strategy=<span class="s">"latency-based-routing"</span>)
<span class="c"># structured output (JSON mode)</span>
resp = <span class="k">await</span> router.acompletion(
model=<span class="s">"best"</span>,
messages=[{<span class="s">"role"</span>:<span class="s">"user"</span>, <span class="s">"content"</span>:prompt}],
response_format={<span class="s">"type"</span>: <span class="s">"json_object"</span>},
)`,
patterns:`<svg viewBox="0 0 340 100" xmlns="http://www.w3.org/2000/svg">
<text x="8" y="14" fill="#7c5cfc" font-size="8" font-family="IBM Plex Mono" font-weight="600">Three Sub-Services (different scaling profiles)</text>
<rect x="8" y="24" width="100" height="65" rx="5" fill="none" stroke="#7c5cfc" stroke-width="1.2"/>
<text x="58" y="40" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono" font-weight="600">RT Inference</text>
<text x="58" y="52" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">low latency</text>
<text x="58" y="62" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">auto-scale</text>
<text x="58" y="72" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">prompt cache</text>
<text x="58" y="82" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">multi-modal</text>
<rect x="118" y="24" width="100" height="65" rx="5" fill="none" stroke="#22c982" stroke-width="1.2"/>
<text x="168" y="40" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono" font-weight="600">Embedding Svc</text>
<text x="168" y="52" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">high throughput</text>
<text x="168" y="62" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">batch-friendly</text>
<text x="168" y="72" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">CPU or GPU</text>
<rect x="228" y="24" width="100" height="65" rx="5" fill="none" stroke="#f5a623" stroke-width="1.2"/>
<text x="278" y="40" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono" font-weight="600">Fine-tuning</text>
<text x="278" y="52" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">GPU-intensive</text>
<text x="278" y="62" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">scheduled/batch</text>
<text x="278" y="72" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">LoRA/QLoRA</text>
<text x="278" y="82" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">quantization</text>
</svg>`
},
data: {
title:"Data & Retrieval", color:"var(--c-data)",
overview:{
purpose:"Storage AND the pipeline that feeds it. Covers relational, vector, document, graph, cache, object storage, plus the ingestion/chunking/ETL pipeline that most RAG projects actually break on.",
components:[
"Relational — PostgreSQL, MySQL, SQLite",
"Vector stores — pgvector, ChromaDB, Qdrant, Weaviate, Pinecone",
"Graph — Neo4j, ArangoDB (knowledge graphs, entity relationships)",
"Document — MongoDB, Elasticsearch (full-text + BM25)",
"Cache — Redis, DragonflyDB, Memcached (hot tier)",
"Object/File — MinIO (S3-compat, S3 Object Lock for WORM/GDPR)",
"Ingestion pipeline — web scraping, XML/RSS feeds, PDF parsing, OCR",
"Chunking strategies — fixed-size, semantic, parent-child, recursive",
"Data versioning — DVC, lakeFS (reproducible experiments)",
"Storage tiers — hot (Redis) → warm (PostgreSQL) → cold (S3/MinIO)"
]
},
stack:["PostgreSQL","pgvector","ChromaDB","Qdrant","Weaviate","Neo4j","MongoDB","Elasticsearch","Redis","DragonflyDB","MinIO","DVC","lakeFS","Unstructured","LlamaParse"],
code:`<span class="c">-- pgvector: hybrid search (vector + BM25)</span>
<span class="k">WITH</span> semantic <span class="k">AS</span> (
<span class="k">SELECT</span> id, content, metadata,
embedding <span class="k">&lt;=&gt;</span> <span class="s">$1</span> <span class="k">AS</span> vec_dist,
ts_rank(tsv, plainto_tsquery(<span class="s">$2</span>)) <span class="k">AS</span> bm25
<span class="k">FROM</span> documents
<span class="k">WHERE</span> embedding <span class="k">&lt;=&gt;</span> <span class="s">$1</span> &lt; <span class="f">0.8</span>
<span class="k">AND</span> tenant_id = <span class="s">$3</span> <span class="c">-- data residency</span>
)
<span class="k">SELECT</span> *, (<span class="f">0.7</span> * (<span class="f">1</span>-vec_dist) + <span class="f">0.3</span> * bm25)
<span class="k">AS</span> score
<span class="k">FROM</span> semantic <span class="k">ORDER BY</span> score <span class="k">DESC</span>
<span class="k">LIMIT</span> <span class="f">10</span>;
<span class="c">-- Knowledge graph enrichment</span>
<span class="c">-- MATCH (p:Product)-[:HAS_CATEGORY]->(c)</span>
<span class="c">-- WHERE p.name =~ '.*Pizza.*'</span>
<span class="c">-- RETURN p, c, p.gdpr_status</span>`,
patterns:`<svg viewBox="0 0 340 100" xmlns="http://www.w3.org/2000/svg">
<text x="8" y="14" fill="#f5a623" font-size="8" font-family="IBM Plex Mono" font-weight="600">Ingestion Pipeline + Hybrid Retrieval</text>
<rect x="8" y="26" width="50" height="24" rx="4" fill="none" stroke="#6a7a9b" stroke-width="1"/>
<text x="33" y="42" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Source</text>
<line x1="58" y1="38" x2="72" y2="38" stroke="#2a3a5c" stroke-width="1"/>
<rect x="72" y="26" width="50" height="24" rx="4" fill="none" stroke="#ff8a50" stroke-width="1"/>
<text x="97" y="42" text-anchor="middle" fill="#ff8a50" font-size="6" font-family="IBM Plex Mono">Parse</text>
<line x1="122" y1="38" x2="136" y2="38" stroke="#2a3a5c" stroke-width="1"/>
<rect x="136" y="26" width="50" height="24" rx="4" fill="none" stroke="#22c982" stroke-width="1"/>
<text x="161" y="42" text-anchor="middle" fill="#22c982" font-size="6" font-family="IBM Plex Mono">Chunk</text>
<line x1="186" y1="38" x2="200" y2="38" stroke="#2a3a5c" stroke-width="1"/>
<rect x="200" y="26" width="50" height="24" rx="4" fill="none" stroke="#7c5cfc" stroke-width="1"/>
<text x="225" y="42" text-anchor="middle" fill="#7c5cfc" font-size="6" font-family="IBM Plex Mono">Embed</text>
<line x1="250" y1="38" x2="264" y2="38" stroke="#2a3a5c" stroke-width="1"/>
<rect x="264" y="26" width="62" height="24" rx="4" fill="none" stroke="#f5a623" stroke-width="1.2"/>
<text x="295" y="42" text-anchor="middle" fill="#f5a623" font-size="6" font-family="IBM Plex Mono">Store</text>
<text x="170" y="70" text-anchor="middle" fill="#3a4a6b" font-size="7" font-family="IBM Plex Mono">↑ This pipeline is where most RAG projects break</text>
<text x="170" y="82" text-anchor="middle" fill="#3a4a6b" font-size="7" font-family="IBM Plex Mono">Hot (Redis) → Warm (PostgreSQL) → Cold (MinIO/S3)</text>
</svg>`
},
infrastructure: {
title:"Infrastructure", color:"var(--c-infra)",
overview:{
purpose:"Without this layer, nothing runs. Compute provisioning, containerization, orchestration, GPU management, networking, CI/CD deployment pipelines.",
components:[
"Containers — Docker, Podman, OCI images",
"Orchestration — Kubernetes, Docker Compose, Nomad",
"IaC — Terraform, Pulumi, Ansible, CloudFormation",
"GPU provisioning — NVIDIA Container Toolkit, MIG, time-slicing",
"CI/CD — GitHub Actions, GitLab CI, ArgoCD, Flux",
"Networking — Ingress (Nginx, Traefik), service mesh (Istio, Linkerd)",
"Registries — Harbor, ECR, GHCR (container images + model artifacts)",
"Environments — dev → staging → prod with promotion gates"
]
},
stack:["Docker","Kubernetes","Terraform","Pulumi","Ansible","GitHub Actions","ArgoCD","NVIDIA Container Toolkit","Traefik","Nginx","Istio","Harbor"],
code:`<span class="c"># docker-compose.yml — AI platform stack</span>
<span class="k">services</span>:
<span class="k">api</span>:
image: <span class="s">ai-platform/api:latest</span>
environment:
- <span class="s">LLM_PROVIDER=litellm</span>
- <span class="s">VECTOR_DB=pgvector</span>
depends_on: [postgres, redis, minio]
<span class="k">vllm</span>:
image: <span class="s">vllm/vllm-openai:latest</span>
deploy:
resources:
reservations:
devices:
- driver: <span class="s">nvidia</span>
count: <span class="f">1</span>
capabilities: [<span class="s">gpu</span>]
command: >
--model <span class="s">meta-llama/Llama-3.1-8B</span>
--max-model-len <span class="f">8192</span>
--gpu-memory-utilization <span class="f">0.9</span>
<span class="k">postgres</span>:
image: <span class="s">pgvector/pgvector:pg16</span>
volumes: [<span class="s">pg_data:/var/lib/postgresql/data</span>]
<span class="k">minio</span>:
image: <span class="s">minio/minio:latest</span>
command: <span class="s">server /data --console-address ":9001"</span>
environment:
- <span class="s">MINIO_OBJECT_LOCKING=on</span> <span class="c"># WORM for GDPR</span>`,
patterns:`<svg viewBox="0 0 340 90" xmlns="http://www.w3.org/2000/svg">
<text x="8" y="14" fill="#38bdf8" font-size="8" font-family="IBM Plex Mono" font-weight="600">Deployment Pipeline</text>
<rect x="8" y="28" width="55" height="22" rx="4" fill="none" stroke="#6a7a9b" stroke-width="1"/>
<text x="35" y="43" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">git push</text>
<line x1="63" y1="39" x2="78" y2="39" stroke="#2a3a5c" stroke-width="1"/>
<rect x="78" y="28" width="55" height="22" rx="4" fill="none" stroke="#ff8a50" stroke-width="1"/>
<text x="105" y="43" text-anchor="middle" fill="#ff8a50" font-size="6" font-family="IBM Plex Mono">CI test</text>
<line x1="133" y1="39" x2="148" y2="39" stroke="#2a3a5c" stroke-width="1"/>
<rect x="148" y="28" width="55" height="22" rx="4" fill="none" stroke="#7c5cfc" stroke-width="1"/>
<text x="175" y="43" text-anchor="middle" fill="#7c5cfc" font-size="6" font-family="IBM Plex Mono">Build</text>
<line x1="203" y1="39" x2="218" y2="39" stroke="#2a3a5c" stroke-width="1"/>
<rect x="218" y="28" width="55" height="22" rx="4" fill="none" stroke="#22c982" stroke-width="1"/>
<text x="245" y="43" text-anchor="middle" fill="#22c982" font-size="6" font-family="IBM Plex Mono">Deploy</text>
<line x1="273" y1="39" x2="288" y2="39" stroke="#2a3a5c" stroke-width="1"/>
<rect x="288" y="28" width="44" height="22" rx="4" fill="none" stroke="#38bdf8" stroke-width="1.2"/>
<text x="310" y="43" text-anchor="middle" fill="#38bdf8" font-size="6" font-family="IBM Plex Mono">Prod</text>
<text x="170" y="70" text-anchor="middle" fill="#3a4a6b" font-size="7" font-family="IBM Plex Mono">DEV → STAGING → PROD with promotion gates</text>
</svg>`
},
// ─── CROSS-CUTTING ────────────────────────────────────
security: {
title:"Security & Compliance", color:"var(--c-sec)",
overview:{
purpose:"Applied AT every layer independently — auth at the API gateway, PII redaction in orchestration, output validation at presentation, GDPR compliance in data, cost controls per user.",
components:[
"Authentication — OAuth2, OIDC, JWT, SAML, Keycloak",
"Authorization — RBAC, ABAC, OPA policies, row-level security",
"AI Safety — Llama Guard 3, NeMo Guardrails, Claude built-in moderation",
"PII detection & redaction — Presidio, custom NER, regex (multi-language DE/NL/FR)",
"Prompt injection defense — input sanitization, output validation, sandwich defense",
"GDPR compliance — workflows, audit trails, data residency per country",
"Data residency / sovereignty — DE vs NL vs FR data routing (critical for multi-country)",
"Rate limiting & abuse prevention — per-user, per-model, per-endpoint",
"Cost controls — budget caps per user/team, model access policies",
"Content moderation — toxicity, bias, factuality scoring"
]
},
stack:["OAuth2","JWT","Keycloak","OPA","Llama Guard 3","NeMo Guardrails","Presidio","RBAC","ABAC","GDPR","S3 Object Lock"],
code:`<span class="c"># multi-layer security: applied at each layer</span>
<span class="c"># ① Presentation: rate limit</span>
<span class="t">@app.middleware</span>(<span class="s">"http"</span>)
<span class="k">async def</span> <span class="f">rate_limit</span>(request, call_next):
user = get_user(request)
<span class="k">if</span> <span class="k">await</span> redis.incr(f<span class="s">"rl:{user.id}"</span>) > <span class="f">100</span>:
<span class="k">raise</span> HTTPException(<span class="f">429</span>)
<span class="k">return await</span> call_next(request)
<span class="c"># ② Orchestration: PII redaction</span>
analyzer = AnalyzerEngine()
results = analyzer.analyze(user_input, <span class="s">"de"</span>)
redacted = anonymize(user_input, results)
<span class="c"># ③ LLM: guardrails</span>
rails = LLMRails(guardrails_config)
safe = <span class="k">await</span> rails.generate(messages)
<span class="c"># ④ Data: residency routing</span>
db = get_db_for_country(user.country)
<span class="c"># DE → eu-central, NL → eu-west, FR → eu-west</span>`,
patterns:`<svg viewBox="0 0 340 100" xmlns="http://www.w3.org/2000/svg">
<text x="8" y="14" fill="#f06292" font-size="8" font-family="IBM Plex Mono" font-weight="600">Security Applied Per Layer (not wrapping)</text>
<line x1="18" y1="25" x2="18" y2="90" stroke="#f06292" stroke-width="1.5" opacity="0.5"/>
<rect x="30" y="24" width="290" height="14" rx="3" fill="none" stroke="#ef5350" stroke-width="0.8"/>
<text x="175" y="34" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Presentation: rate limit, auth, CORS</text>
<rect x="30" y="42" width="290" height="14" rx="3" fill="none" stroke="#22c982" stroke-width="0.8"/>
<text x="175" y="52" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Orchestration: PII redaction, prompt injection defense</text>
<rect x="30" y="60" width="290" height="14" rx="3" fill="none" stroke="#7c5cfc" stroke-width="0.8"/>
<text x="175" y="70" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">LLM: guardrails, output validation, cost caps</text>
<rect x="30" y="78" width="290" height="14" rx="3" fill="none" stroke="#f5a623" stroke-width="0.8"/>
<text x="175" y="88" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Data: encryption, residency, GDPR AI ACT</text>
</svg>`
},
observability: {
title:"Observability", color:"var(--c-obs)",
overview:{
purpose:"Instruments INSIDE each layer — not around it. LLM-specific traces (token cost, latency, quality), system metrics, distributed tracing, cost analytics, alerting.",
components:[
"LLM Observability — LangSmith, LangFuse, Helicone (cost/token/latency per call)",
"Infrastructure metrics — Prometheus + Grafana dashboards",
"Distributed tracing — OpenTelemetry, Jaeger, Tempo (end-to-end request tracing)",
"Log aggregation — Loki, ELK, Datadog logs",
"Cost analytics — per-model, per-user, per-feature cost breakdowns",
"Quality metrics — RAGAS scores, hallucination rate, retrieval precision/recall",
"Alerting — PagerDuty, Opsgenie, Slack (latency SLA, error rate, cost spike)",
"Custom dashboards — model comparison, A/B test results, drift detection"
]
},
stack:["LangSmith","LangFuse","Helicone","Prometheus","Grafana","OpenTelemetry","Jaeger","Loki","Datadog","Tempo","PagerDuty"],
code:`<span class="c"># observability: instrumented at each layer</span>
<span class="k">from</span> langfuse.decorators <span class="k">import</span> observe
<span class="k">from</span> opentelemetry <span class="k">import</span> trace
<span class="k">from</span> prometheus_client <span class="k">import</span> Histogram
tracer = trace.get_tracer(<span class="s">"ai-platform"</span>)
llm_lat = Histogram(<span class="s">"llm_seconds"</span>, <span class="s">"LLM latency"</span>,
[<span class="s">"model"</span>,<span class="s">"provider"</span>])
<span class="t">@observe</span>(name=<span class="s">"rag-pipeline"</span>)
<span class="k">async def</span> <span class="f">answer</span>(query):
<span class="k">with</span> tracer.start_as_current_span(<span class="s">"retrieve"</span>):
chunks = <span class="k">await</span> retrieve(query)
<span class="k">with</span> tracer.start_as_current_span(<span class="s">"generate"</span>):
<span class="k">with</span> llm_lat.labels(<span class="s">"claude"</span>,<span class="s">"anthropic"</span>)\\
.time():
resp = <span class="k">await</span> generate(query, chunks)
<span class="c"># auto: tokens, cost, latency → LangFuse</span>
<span class="c"># auto: spans → Jaeger via OTel</span>
<span class="c"># auto: llm_seconds → Prometheus → Grafana</span>
<span class="k">return</span> resp`,
patterns:`<svg viewBox="0 0 340 90" xmlns="http://www.w3.org/2000/svg">
<text x="8" y="14" fill="#9c7cfc" font-size="8" font-family="IBM Plex Mono" font-weight="600">Observability Stack (Three Pillars + LLM)</text>
<rect x="8" y="26" width="75" height="50" rx="5" fill="none" stroke="#9c7cfc" stroke-width="1"/>
<text x="45" y="44" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono">Metrics</text>
<text x="45" y="56" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Prometheus</text>
<text x="45" y="66" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Grafana</text>
<rect x="92" y="26" width="75" height="50" rx="5" fill="none" stroke="#4a90ff" stroke-width="1"/>
<text x="129" y="44" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono">Traces</text>
<text x="129" y="56" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">OTel</text>
<text x="129" y="66" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Jaeger</text>
<rect x="176" y="26" width="75" height="50" rx="5" fill="none" stroke="#22c982" stroke-width="1"/>
<text x="213" y="44" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono">Logs</text>
<text x="213" y="56" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">Loki</text>
<text x="213" y="66" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">ELK</text>
<rect x="260" y="26" width="72" height="50" rx="5" fill="none" stroke="#f5a623" stroke-width="1.2"/>
<text x="296" y="44" text-anchor="middle" fill="#f5a623" font-size="7" font-family="IBM Plex Mono" font-weight="600">LLM</text>
<text x="296" y="56" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">LangFuse</text>
<text x="296" y="66" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">cost+quality</text>
</svg>`
},
configuration: {
title:"Configuration", color:"var(--c-conf)",
overview:{
purpose:"Foundational — every layer reads from it. Secrets, env vars, model parameters, feature flags, runtime toggles. Not the outermost layer; it's the first thing any layer bootstraps from.",
components:[
"Secret management — Vault, SOPS, AWS Secrets Manager, sealed-secrets",
"Config distribution — ConfigMaps, etcd, Consul, Spring Cloud Config",
"Token & API key rotation with automatic expiry and alerting",
"Model parameter registry — MLflow, W&B (hyperparams, versions, metadata)",
"Feature flags — LaunchDarkly, Unleash, Flagsmith (toggle features per env/user)",
"Environment promotion — dev → staging → prod with config drift detection"
]
},
stack:["Vault","etcd","Consul","ConfigMaps","MLflow","W&B","LaunchDarkly","Unleash","SOPS","dotenv","AWS SSM"],
code:`<span class="c"># config hierarchy: secrets → env → flags → runtime</span>
<span class="k">from</span> pydantic_settings <span class="k">import</span> BaseSettings
<span class="k">class</span> <span class="t">Config</span>(BaseSettings):
<span class="c"># from Vault (highest priority)</span>
anthropic_api_key: str
db_password: str
<span class="c"># from env / ConfigMap</span>
default_model: str = <span class="s">"claude-sonnet-4-20250514"</span>
fallback_models: list = [
<span class="s">"deepseek-chat"</span>, <span class="s">"ollama/llama3.1"</span>
]
embedding_model: str = <span class="s">"BAAI/bge-m3"</span>
max_tokens: int = <span class="f">4096</span>
<span class="c"># from feature flags (runtime toggleable)</span>
enable_streaming: bool = <span class="f">True</span>
enable_prompt_caching: bool = <span class="f">True</span>
rag_reranking_enabled: bool = <span class="f">False</span>
<span class="k">class</span> <span class="t">Config</span>:
env_prefix = <span class="s">"AI_"</span>`,
patterns:`<svg viewBox="0 0 340 75" xmlns="http://www.w3.org/2000/svg">
<text x="8" y="14" fill="#6a7a9b" font-size="8" font-family="IBM Plex Mono" font-weight="600">Config Cascade (priority order)</text>
<rect x="8" y="24" width="75" height="40" rx="5" fill="none" stroke="#ef5350" stroke-width="1.2"/>
<text x="45" y="42" text-anchor="middle" fill="#ef5350" font-size="7" font-family="IBM Plex Mono">Vault</text>
<text x="45" y="53" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">secrets ①</text>
<line x1="83" y1="44" x2="93" y2="44" stroke="#2a3a5c" stroke-width="1"/>
<rect x="93" y="24" width="75" height="40" rx="5" fill="none" stroke="#f5a623" stroke-width="1"/>
<text x="130" y="42" text-anchor="middle" fill="#f5a623" font-size="7" font-family="IBM Plex Mono">Env</text>
<text x="130" y="53" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">config ②</text>
<line x1="168" y1="44" x2="178" y2="44" stroke="#2a3a5c" stroke-width="1"/>
<rect x="178" y="24" width="75" height="40" rx="5" fill="none" stroke="#22c982" stroke-width="1"/>
<text x="215" y="42" text-anchor="middle" fill="#22c982" font-size="7" font-family="IBM Plex Mono">Flags</text>
<text x="215" y="53" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">runtime ③</text>
<line x1="253" y1="44" x2="263" y2="44" stroke="#2a3a5c" stroke-width="1"/>
<rect x="263" y="24" width="68" height="40" rx="5" fill="none" stroke="#4a90ff" stroke-width="1"/>
<text x="297" y="42" text-anchor="middle" fill="#4a90ff" font-size="7" font-family="IBM Plex Mono">Default</text>
<text x="297" y="53" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">code ④</text>
</svg>`
},
memory: {
title:"Memory & State", color:"var(--c-mem)",
overview:{
purpose:"Conversation history, agent working memory, episodic memory, session state, user preferences. Redis-as-cache is NOT the same as a memory system. Critical for multi-turn agents.",
components:[
"Conversation history — per-session message buffer, sliding window + summarization",
"Agent working memory — scratchpad for multi-step reasoning, intermediate results",
"Episodic memory — long-term retrieval of past interactions (mem0, Zep, custom)",
"Semantic memory — knowledge distilled from conversations into structured facts",
"Session state — user context, preferences, auth tokens, active tool state",
"Shared state — multi-agent shared blackboard for cooperative task solving",
"Persistence — conversation → DB, with TTL policies and GDPR-compliant deletion"
]
},
stack:["Redis","mem0","Zep","PostgreSQL","LangGraph checkpointing","Custom episodic stores"],
code:`<span class="c"># memory system: layered (working → episodic → semantic)</span>
<span class="k">from</span> mem0 <span class="k">import</span> Memory
memory = Memory.from_config({
<span class="s">"vector_store"</span>: {
<span class="s">"provider"</span>: <span class="s">"qdrant"</span>,
<span class="s">"config"</span>: {<span class="s">"url"</span>: <span class="s">"http://localhost:6333"</span>}
}
})
<span class="c"># add interaction to long-term memory</span>
memory.add(
<span class="s">"User asked about GDPR Art.17 deletion"</span>
<span class="s">" for SAP records in DE region"</span>,
user_id=<span class="s">"christof"</span>,
metadata={<span class="s">"topic"</span>: <span class="s">"gdpr"</span>, <span class="s">"system"</span>: <span class="s">"sap"</span>}
)
<span class="c"># retrieve relevant memories for context</span>
memories = memory.search(
<span class="s">"GDPR deletion status"</span>,
user_id=<span class="s">"christof"</span>,
limit=<span class="f">5</span>
)
<span class="c"># → inject into system prompt as context</span>`,
patterns:`<svg viewBox="0 0 340 90" xmlns="http://www.w3.org/2000/svg">
<text x="8" y="14" fill="#00d4e6" font-size="8" font-family="IBM Plex Mono" font-weight="600">Memory Hierarchy</text>
<rect x="8" y="26" width="100" height="50" rx="5" fill="none" stroke="#ef5350" stroke-width="1"/>
<text x="58" y="42" text-anchor="middle" fill="#ef5350" font-size="7" font-family="IBM Plex Mono">Working</text>
<text x="58" y="54" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">current turn</text>
<text x="58" y="64" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">scratchpad</text>
<rect x="118" y="26" width="100" height="50" rx="5" fill="none" stroke="#00d4e6" stroke-width="1.2"/>
<text x="168" y="42" text-anchor="middle" fill="#00d4e6" font-size="7" font-family="IBM Plex Mono">Episodic</text>
<text x="168" y="54" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">past interactions</text>
<text x="168" y="64" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">mem0 / Zep</text>
<rect x="228" y="26" width="100" height="50" rx="5" fill="none" stroke="#f5a623" stroke-width="1"/>
<text x="278" y="42" text-anchor="middle" fill="#f5a623" font-size="7" font-family="IBM Plex Mono">Semantic</text>
<text x="278" y="54" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">distilled facts</text>
<text x="278" y="64" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">knowledge graph</text>
</svg>`
},
// ─── LIFECYCLE & DEV ──────────────────────────────────
lifecycle: {
title:"Continuous Improvement Loop", color:"var(--c-life)",
overview:{
purpose:"The closed loop that makes the system LEARN. User feedback → evaluation → fine-tuning → deployment → monitoring → repeat. Without this, your system is frozen at day-one quality.",
components:[
"User feedback — thumbs up/down, corrections, explicit ratings",
"Automated eval — RAGAS, DeepEval, custom eval harnesses on golden datasets",
"Drift detection — embedding drift, answer quality degradation over time",
"Fine-tuning trigger — when eval scores drop below threshold → retrain",
"Model versioning — MLflow model registry, A/B testing between versions",
"Canary deployment — gradual rollout of new model versions (5% → 25% → 100%)",
"Governance — approval gates for production model updates, rollback capability"
]
},
stack:["MLflow","DVC","RAGAS","DeepEval","W&B","ArgoCD","Canary deploys","A/B testing"],
code:`<span class="c"># feedback loop: eval → trigger retrain</span>
<span class="k">from</span> mlflow <span class="k">import</span> MlflowClient
client = MlflowClient()
<span class="c"># 1. evaluate current model on golden set</span>
scores = run_eval_suite(
model=<span class="s">"production"</span>,
dataset=<span class="s">"golden-v3"</span>,
metrics=[<span class="s">"relevancy"</span>,<span class="s">"faithfulness"</span>,<span class="s">"recall"</span>]
)
<span class="c"># 2. check if degraded</span>
<span class="k">if</span> scores[<span class="s">"relevancy"</span>] &lt; <span class="f">0.75</span>:
<span class="c"># 3. trigger fine-tune with recent feedback</span>
new_run = trigger_finetune(
base_model=<span class="s">"llama3.1-8b"</span>,
dataset=<span class="s">"feedback-2025-q2"</span>,
method=<span class="s">"qlora"</span>,
)
<span class="c"># 4. register + canary deploy</span>
client.transition_model_version_stage(
name=<span class="s">"rag-model"</span>,
version=new_run.version,
stage=<span class="s">"Staging"</span> <span class="c"># → canary 5%</span>
)`,
patterns:`<svg viewBox="0 0 340 90" xmlns="http://www.w3.org/2000/svg">
<text x="8" y="14" fill="#84cc16" font-size="8" font-family="IBM Plex Mono" font-weight="600">Continuous Improvement Cycle</text>
<rect x="120" y="22" width="70" height="22" rx="4" fill="none" stroke="#84cc16" stroke-width="1.2"/>
<text x="155" y="37" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono">Deploy</text>
<path d="M190 33 Q230 33 230 50 Q230 67 190 67" fill="none" stroke="#9c7cfc" stroke-width="1"/>
<text x="240" y="53" fill="#9c7cfc" font-size="6" font-family="IBM Plex Mono">Monitor</text>
<rect x="120" y="56" width="70" height="22" rx="4" fill="none" stroke="#f5a623" stroke-width="1.2"/>
<text x="155" y="71" text-anchor="middle" fill="#c8d0e0" font-size="7" font-family="IBM Plex Mono">Eval</text>
<path d="M120 67 Q80 67 80 50 Q80 33 120 33" fill="none" stroke="#ef5350" stroke-width="1"/>
<text x="48" y="53" fill="#ef5350" font-size="6" font-family="IBM Plex Mono">Feedback</text>
<text x="155" y="51" text-anchor="middle" fill="#3a4a6b" font-size="6" font-family="IBM Plex Mono">fine-tune</text>
</svg>`
},
devpractices: {
title:"Dev Practices", color:"var(--c-dev)",
overview:{
purpose:"Development-time concerns — NOT a runtime layer. Testing, CI/CD, code review, documentation, LLM evals. These live in your pipeline, not in your production stack.",
components:[
"Unit & Integration testing — pytest, vitest, Jest",
"E2E testing — Playwright, Cypress, Selenium",
"LLM Evaluation — DeepEval, RAGAS, custom eval harnesses, golden datasets",
"Prompt regression — compare outputs across model versions on fixed inputs",
"CI/CD — GitHub Actions, GitLab CI, pre-commit hooks",
"Code review — PR templates, architecture decision records (ADRs)",
"Documentation — API docs (OpenAPI), runbooks, architecture diagrams (this!)",
"Linting & formatting — ruff, black, prettier, eslint"
]
},
stack:["pytest","vitest","Playwright","Cypress","DeepEval","RAGAS","GitHub Actions","pre-commit","ruff","black","ADRs"],
code:`<span class="c"># CI pipeline: test → eval → deploy</span>
<span class="c"># .github/workflows/ai-platform.yml</span>
<span class="k">name</span>: <span class="s">AI Platform CI</span>
<span class="k">on</span>: [push, pull_request]
<span class="k">jobs</span>:
<span class="k">test</span>:
<span class="k">steps</span>:
- <span class="s">pytest tests/ -x --tb=short</span>
- <span class="s">playwright test e2e/</span>
<span class="k">llm-eval</span>:
<span class="k">needs</span>: test
<span class="k">steps</span>:
- <span class="s">python eval/run_golden_set.py</span>
- <span class="k">if</span>: <span class="s">steps.eval.outputs.score &lt; 0.7</span>
<span class="k">run</span>: <span class="s">echo "::error::Eval below threshold"</span>
<span class="k">deploy</span>:
<span class="k">needs</span>: llm-eval
<span class="k">if</span>: <span class="s">github.ref == 'refs/heads/main'</span>
<span class="k">steps</span>:
- <span class="s">argocd app sync ai-platform --strategy canary</span>`,
patterns:`<svg viewBox="0 0 340 70" xmlns="http://www.w3.org/2000/svg">
<text x="8" y="14" fill="#ff8a50" font-size="8" font-family="IBM Plex Mono" font-weight="600">Dev-Time vs Runtime (category distinction)</text>
<rect x="8" y="26" width="155" height="35" rx="5" fill="none" stroke="#ff8a50" stroke-width="1.2" stroke-dasharray="4"/>
<text x="85" y="40" text-anchor="middle" fill="#ff8a50" font-size="7" font-family="IBM Plex Mono">DEV-TIME</text>
<text x="85" y="52" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">test · lint · eval · CI/CD · review</text>
<rect x="175" y="26" width="155" height="35" rx="5" fill="none" stroke="#4a90ff" stroke-width="1.2"/>
<text x="252" y="40" text-anchor="middle" fill="#4a90ff" font-size="7" font-family="IBM Plex Mono">RUNTIME</text>
<text x="252" y="52" text-anchor="middle" fill="#6a7a9b" font-size="6" font-family="IBM Plex Mono">security · observability · config · memory</text>
</svg>`
}
};
// ═══════════════════════════════════════════════════════
// RENDERING
// ═══════════════════════════════════════════════════════
let curKey=null, curTab='overview';
const detBody=document.getElementById('detBody');
const tabs=document.querySelectorAll('.dt');
const allEl=document.querySelectorAll('.lyr,.xbar,.lifecycle,.devprac');
function render(key,tab){
const d=D[key]; if(!d)return;
curKey=key; curTab=tab||'overview';
tabs.forEach(t=>t.classList.toggle('act',t.dataset.tab===curTab));
let h=`<div class="d-title">${d.title}</div><div class="d-bar" style="background:${d.color}"></div>`;
if(curTab==='overview'){
h+=`<div class="d-h">Purpose</div><p class="d-p">${d.overview.purpose}</p>`;
h+=`<div class="d-h">Components</div><ul class="d-ul">${d.overview.components.map(c=>`<li>${c}</li>`).join('')}</ul>`;
}else if(curTab==='stack'){
h+=`<div class="d-h">Technology Stack</div><div class="d-badges">${d.stack.map(t=>`<span class="d-badge">${t}</span>`).join('')}</div>`;
}else if(curTab==='code'){
h+=`<div class="d-h">Example</div><div class="d-code">${d.code}</div>`;
}else if(curTab==='patterns'){
h+=`<div class="d-h">Architecture Pattern</div><div class="d-pattern">${d.patterns}</div>`;
}
detBody.innerHTML=h;
}
allEl.forEach(el=>{
el.addEventListener('click',e=>{
e.stopPropagation();
const key=el.dataset.key; if(!key||!D[key])return;
allEl.forEach(x=>x.classList.remove('sel'));
el.classList.add('sel');
render(key,curTab);
});
});
tabs.forEach(t=>t.addEventListener('click',()=>{if(curKey)render(curKey,t.dataset.tab)}));
// ═══ KEYBOARD ═══
const nav=['security','observability','configuration','memory','presentation','orchestration','llm','data','infrastructure','lifecycle','devpractices'];
let ni=-1;
document.addEventListener('keydown',e=>{
if(e.key==='ArrowDown'||e.key==='ArrowUp'){
e.preventDefault();
ni=e.key==='ArrowDown'?Math.min(ni+1,nav.length-1):Math.max(ni-1,0);
const el=document.querySelector(`[data-key="${nav[ni]}"]`);
if(el){el.click();el.scrollIntoView({behavior:'smooth',block:'nearest'})}
}
if(e.key==='Enter'&&curKey){
const ts=['overview','stack','code','patterns'];
render(curKey,ts[(ts.indexOf(curTab)+1)%ts.length]);
}
if((e.key==='t'||e.key==='T')&&!tracing)runTrace(traceReq);
});
// ═══ TRACE ═══
let tracing=false;
const traceReq=[
{key:'presentation', lbl:'① Presentation', desc:'User sends chat message via WebSocket',time:'0ms'},
{key:'security', lbl:'② Security', desc:'Rate limit check → auth → input sanitization → PII redaction',time:'15ms'},
{key:'configuration',lbl:'③ Config', desc:'Load model routing config, feature flags',time:'18ms'},
{key:'orchestration', lbl:'④ Orchestration', desc:'Agent plans: Think → retrieve → generate',time:'32ms'},
{key:'memory', lbl:'⑤ Memory', desc:'Load conversation history + relevant episodic memories',time:'45ms'},
{key:'data', lbl:'⑥ Data', desc:'Hybrid pgvector search: 0.7×vector + 0.3×BM25, top-10',time:'78ms'},
{key:'llm', lbl:'⑦ LLM', desc:'Claude generates with context + prompt cache hit (streaming)',time:'320ms'},
{key:'security', lbl:'⑧ Security', desc:'Output validation: PII check, guardrails, toxicity',time:'328ms'},
{key:'observability',lbl:'⑨ Observability', desc:'Trace logged → LangFuse (tokens, latency, cost, quality)',time:'332ms'},
{key:'presentation', lbl:'⑩ Presentation', desc:'SSE stream delivers tokens to user',time:'340ms'},
];
const traceRag=[
{key:'presentation', lbl:'① Query', desc:'User: "What is GDPR Art.17?"',time:'0ms'},
{key:'orchestration',lbl:'② Plan', desc:'RAG strategy selected: hybrid search + rerank',time:'12ms'},
{key:'llm', lbl:'③ Embed', desc:'Query → BGE-M3 embedding (1024d)',time:'25ms'},
{key:'data', lbl:'④ Retrieve', desc:'pgvector hybrid: 10 candidates from 50k chunks',time:'48ms'},
{key:'orchestration',lbl:'⑤ Rerank', desc:'Cross-encoder reranks → top-5 relevant chunks',time:'110ms'},
{key:'memory', lbl:'⑥ Context', desc:'Inject conversation history + past GDPR queries',time:'118ms'},
{key:'llm', lbl:'⑦ Generate', desc:'Claude generates answer with 5 chunks + memory',time:'420ms'},
{key:'observability',lbl:'⑧ Eval', desc:'RAGAS: relevancy=0.92 faithfulness=0.88 recall=0.85',time:'435ms'},
];
const traceAgent=[
{key:'presentation', lbl:'① Task', desc:'User: "Check pizza products GDPR deletion status"',time:'0ms'},
{key:'orchestration',lbl:'② Plan', desc:'Agent creates 3-step plan (search → check → report)',time:'180ms'},
{key:'orchestration',lbl:'③ Act', desc:'MCP tool call: search_catalog("frozen pizza")',time:'220ms'},
{key:'data', lbl:'④ Fetch', desc:'Product catalog returns 12 pizza SKUs',time:'340ms'},
{key:'orchestration',lbl:'⑤ Observe', desc:'Agent parses results, iterates GDPR checks',time:'500ms'},
{key:'orchestration',lbl:'⑥ Act', desc:'MCP: check_gdpr_status(system="SAP", batch=12)',time:'540ms'},
{key:'data', lbl:'⑦ Fetch', desc:'Deletion protocoll returns status for all 12 records',time:'680ms'},
{key:'llm', lbl:'⑧ Synthesize',desc:'Agent generates formatted summary report',time:'1100ms'},
{key:'observability',lbl:'⑨ Trace', desc:'8 steps, 3 tool calls, 4200 tokens logged',time:'1120ms'},
{key:'presentation', lbl:'⑩ Respond', desc:'Table + status summary streamed to user',time:'1200ms'},
];
const traceFeedback=[
{key:'observability',lbl:'① Monitor', desc:'Quality score drift detected: relevancy dropped 0.82→0.71',time:'0ms'},
{key:'data', lbl:'② Collect', desc:'Aggregate 2000 user feedback samples from last 30 days',time:'5s'},
{key:'llm', lbl:'③ Fine-tune', desc:'QLoRA training on feedback dataset (RTX 5090, 2 epochs)',time:'45min'},
{key:'infrastructure',lbl:'④ Build', desc:'Quantize to GGUF Q4_K_M, build container, push registry',time:'52min'},
{key:'infrastructure',lbl:'⑤ Deploy', desc:'Canary rollout: 5% traffic to new model version',time:'53min'},
{key:'observability',lbl:'⑥ Validate', desc:'A/B eval: new model relevancy=0.86 vs baseline=0.71',time:'2h'},
{key:'configuration',lbl:'⑦ Promote', desc:'Feature flag: route 100% to new model, archive old',time:'2h'},
];
function runTrace(steps){
if(tracing)return; tracing=true;
const bar=document.getElementById('traceBar'),fill=document.getElementById('tFill');
const lbl=document.getElementById('tLbl'),desc=document.getElementById('tDesc'),tm=document.getElementById('tTime');
bar.classList.add('show'); fill.style.width='0%';
let i=0;
(function next(){
if(i>=steps.length){setTimeout(()=>{bar.classList.remove('show');tracing=false},1800);return}
const s=steps[i];
fill.style.width=((i+1)/steps.length*100).toFixed(0)+'%';
lbl.textContent=s.lbl; desc.textContent=s.desc; tm.textContent=s.time;
const el=document.querySelector(`[data-key="${s.key}"]`);
if(el){el.classList.add('flash');el.click();setTimeout(()=>el.classList.remove('flash'),500)}
i++; setTimeout(next,700);
})();
}
</script>
</body>
</html>