Spaces:

joeddav
/

illustrated-cluster

Sleeping

App Files Files Community

joeddav commited on Mar 9

Commit

1f77aa7

1 Parent(s): 83ce100

Publish WIP HF Space snapshot

Browse files

Files changed (35) hide show

.dockerignore +4 -0
.gitignore +27 -0
Dockerfile +22 -0
README.md +53 -6
compose.yaml +12 -0
eslint.config.js +23 -0
index.html +13 -0
package-lock.json +0 -0
package.json +40 -0
playwright.config.ts +36 -0
src/App.css +641 -0
src/App.tsx +205 -0
src/components/ClusterMap.tsx +2086 -0
src/components/ControlsPanel.tsx +688 -0
src/components/pixi/PixiSurface.tsx +50 -0
src/hooks/useElementSize.ts +48 -0
src/index.css +56 -0
src/lib/linkedFocus.ts +35 -0
src/lib/topologyLod.ts +216 -0
src/lib/topologyScene.ts +980 -0
src/lib/trainingClusterModel.ts +1882 -0
src/lib/viewOptions.ts +50 -0
src/lib/workbench.ts +395 -0
src/lib/workbenchPresenter.ts +220 -0
src/main.tsx +15 -0
src/types/global.d.ts +49 -0
tests/topology.spec.ts +234 -0
tests/topologyLod.test.ts +49 -0
tests/topologySceneModel.test.ts +90 -0
tests/trainingClusterModel.test.ts +269 -0
tsconfig.app.json +28 -0
tsconfig.json +7 -0
tsconfig.node.json +26 -0
vite.config.ts +14 -0
vitest.config.ts +7 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,4 @@

+node_modules
+dist
+.git
+npm-debug.log

.gitignore ADDED Viewed

	@@ -0,0 +1,27 @@

+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+node_modules
+dist
+dist-ssr
+*.local
+test-results
+playwright-report
+tests/topology.spec.ts-snapshots
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?

Dockerfile ADDED Viewed

	@@ -0,0 +1,22 @@

+FROM node:22-alpine AS base
+WORKDIR /app
+COPY package*.json ./
+RUN npm ci
+FROM base AS dev
+COPY . .
+EXPOSE 7860
+CMD ["npm", "run", "dev"]
+FROM base AS build
+COPY . .
+RUN npm run build
+FROM node:22-alpine AS production
+RUN npm install -g serve@14.2.4
+USER node
+ENV HOME=/home/node
+WORKDIR /home/node/app
+COPY --from=build --chown=node:node /app/dist ./dist
+EXPOSE 7860
+CMD ["serve", "-s", "dist", "-l", "7860"]

README.md CHANGED Viewed

@@ -1,12 +1,59 @@
 ---
-title: Illustrated Cluster
-emoji: ⚡
-colorFrom: purple
-colorTo: indigo
 sdk: docker
 pinned: false
 license: mit
-short_description: '[WIP] Interactive visualization of an LLM training cluster'
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: "[WIP] Illustrated Training Cluster"
+colorFrom: yellow
+colorTo: green
 sdk: docker
+app_port: 7860
 pinned: false
 license: mit
+short_description: "[WIP] Interactive visualization of an LLM training cluster"
 ---
+# [WIP] Illustrated Training Cluster
+Interactive workbench for exploring how large-model training layouts map onto GPU clusters.
+Current WIP scope:
+- compute-backed memory, communication, and throughput estimates
+- linked cluster and transformer visualizations
+- editable model, cluster, training, and parallelism controls
+- built-in OLMo 3 32B and Trinity Large 400B starting points
+Temporary note:
+- the Llama 3.1 405B example is hidden from the UI while its training recipe is being reworked
+## Stack
+- React 19 + TypeScript
+- Vite
+- PixiJS + `@pixi/react`
+- Docker for local runs and Hugging Face Spaces deployment
+## Local development
+```bash
+docker compose up --build
+```
+Then open [http://localhost:7860](http://localhost:7860).
+## Checks
+```bash
+npm run test:unit
+npm run lint
+npm run build
+npm run test:e2e
+```
+## Debugging and snapshots
+- `?debug=1` enables the in-app debug overlay
+- `?snapshot=1` freezes animation for deterministic screenshots
+- `?scenario=default|olmo-pretraining|olmo-long-context|llama-pretraining|llama-long-context|trinity-pretraining|trinity-long-context|infeasible-memory`
+## Hugging Face Spaces
+This repository is configured as a Docker Space. Hugging Face builds the root `Dockerfile` and serves the app on port `7860`.

compose.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+services:
+  app:
+    build:
+      context: .
+      target: dev
+    ports:
+      - '7860:7860'
+    environment:
+      CHOKIDAR_USEPOLLING: 'true'
+    volumes:
+      - .:/app
+      - /app/node_modules

eslint.config.js ADDED Viewed

	@@ -0,0 +1,23 @@

+import js from '@eslint/js'
+import globals from 'globals'
+import reactHooks from 'eslint-plugin-react-hooks'
+import reactRefresh from 'eslint-plugin-react-refresh'
+import tseslint from 'typescript-eslint'
+import { defineConfig, globalIgnores } from 'eslint/config'
+export default defineConfig([
+  globalIgnores(['dist']),
+  {
+    files: ['**/*.{ts,tsx}'],
+    extends: [
+      js.configs.recommended,
+      tseslint.configs.recommended,
+      reactHooks.configs.flat.recommended,
+      reactRefresh.configs.vite,
+    ],
+    languageOptions: {
+      ecmaVersion: 2020,
+      globals: globals.browser,
+    },
+  },
+])

index.html ADDED Viewed

	@@ -0,0 +1,13 @@

+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <link rel="icon" type="image/svg+xml" href="/vite.svg" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>[WIP] Illustrated Training Cluster</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.tsx"></script>
+  </body>
+</html>

package-lock.json ADDED Viewed

The diff for this file is too large to render. See raw diff

package.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "name": "cluster-topology-viz",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "tsc -b && vite build",
+    "lint": "eslint .",
+    "preview": "vite preview",
+    "check": "npm run lint && npm run build",
+    "test:unit": "vitest run",
+    "test:e2e": "npm run build && playwright test",
+    "test:e2e:update": "npm run build && playwright test --update-snapshots"
+  },
+  "dependencies": {
+    "@fontsource/ibm-plex-mono": "^5.2.7",
+    "@fontsource/space-grotesk": "^5.2.10",
+    "@pixi/react": "^8.0.5",
+    "pixi.js": "^8.16.0",
+    "react": "^19.2.0",
+    "react-dom": "^19.2.0"
+  },
+  "devDependencies": {
+    "@eslint/js": "^9.39.1",
+    "@playwright/test": "^1.58.2",
+    "@types/node": "^24.10.1",
+    "@types/react": "^19.2.7",
+    "@types/react-dom": "^19.2.3",
+    "@vitejs/plugin-react": "^5.1.1",
+    "eslint": "^9.39.1",
+    "eslint-plugin-react-hooks": "^7.0.1",
+    "eslint-plugin-react-refresh": "^0.4.24",
+    "globals": "^16.5.0",
+    "typescript": "~5.9.3",
+    "typescript-eslint": "^8.48.0",
+    "vite": "^7.3.1",
+    "vitest": "^4.0.18"
+  }
+}

playwright.config.ts ADDED Viewed

	@@ -0,0 +1,36 @@

+import { defineConfig, devices } from '@playwright/test'
+export default defineConfig({
+  testDir: './tests',
+  testMatch: /.*\.spec\.ts/,
+  fullyParallel: false,
+  retries: 0,
+  reporter: 'list',
+  workers: 1,
+  use: {
+    baseURL: 'http://127.0.0.1:4173',
+    trace: 'on-first-retry',
+    viewport: {
+      width: 1600,
+      height: 1100,
+    },
+  },
+  projects: [
+    {
+      name: 'chromium',
+      use: {
+        ...devices['Desktop Chrome'],
+        viewport: {
+          width: 1600,
+          height: 1100,
+        },
+      },
+    },
+  ],
+  webServer: {
+    command: 'npm run preview -- --host 127.0.0.1 --port 4173',
+    port: 4173,
+    reuseExistingServer: true,
+    timeout: 120000,
+  },
+})

src/App.css ADDED Viewed

	@@ -0,0 +1,641 @@

+.workbench-shell {
+  max-width: 1680px;
+  margin: 0 auto;
+  padding: 18px;
+}
+.mini-label {
+  margin: 0 0 6px;
+  color: var(--accent-cool);
+  font-family: var(--font-mono);
+  font-size: 0.72rem;
+  letter-spacing: 0.13em;
+  text-transform: uppercase;
+}
+.app-topbar {
+  display: grid;
+  gap: 14px;
+  margin-bottom: 14px;
+}
+.title-block {
+  display: grid;
+  gap: 4px;
+}
+.title-block h1 {
+  margin: 0;
+  color: var(--ink-strong);
+  font-size: clamp(1.7rem, 2vw, 2.2rem);
+  line-height: 1;
+  letter-spacing: -0.04em;
+}
+.title-copy {
+  margin: 0;
+  max-width: 88ch;
+  color: var(--ink-soft);
+  font-size: 0.98rem;
+}
+.summary-strip {
+  display: grid;
+  grid-template-columns: minmax(260px, 1.8fr) repeat(4, minmax(0, 1fr));
+  gap: 10px;
+}
+.summary-card,
+.controls-band,
+.map-panel,
+.side-card {
+  border: 1px solid var(--panel-stroke);
+  background: rgba(253, 252, 248, 0.92);
+  box-shadow: 0 12px 28px rgba(19, 42, 51, 0.06);
+}
+.summary-card {
+  min-height: 88px;
+  padding: 12px 14px;
+  border-radius: 16px;
+}
+.summary-card span,
+.fact-row span,
+.inspector-grid dt {
+  display: block;
+  color: var(--ink-muted);
+  font-size: 0.76rem;
+  text-transform: uppercase;
+  letter-spacing: 0.08em;
+}
+.summary-card strong,
+.fact-row strong,
+.inspector-grid dd {
+  display: block;
+  margin-top: 6px;
+  color: var(--ink-strong);
+  font-size: 1.15rem;
+  line-height: 1.05;
+}
+.summary-card p {
+  margin: 8px 0 0;
+  color: var(--ink-soft);
+  font-size: 0.9rem;
+}
+.summary-card-wide strong {
+  font-size: 1.25rem;
+}
+.controls-band {
+  padding: 12px 14px 14px;
+  border-radius: 16px;
+}
+.controls-head {
+  display: flex;
+  justify-content: space-between;
+  gap: 12px;
+  align-items: flex-end;
+  margin-bottom: 12px;
+}
+.controls-head h2,
+.topology-header h2,
+.side-header h3 {
+  margin: 0;
+  color: var(--ink-strong);
+  font-size: 1.1rem;
+}
+.controls-meta {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 8px;
+  align-items: center;
+}
+.controls-meta span,
+.reset-chip,
+.scene-button {
+  padding: 7px 10px;
+  border-radius: 999px;
+  border: 1px solid rgba(19, 58, 80, 0.09);
+  background: rgba(246, 244, 238, 0.92);
+  color: var(--ink-soft);
+  font-family: var(--font-mono);
+  font-size: 0.78rem;
+}
+.reset-chip,
+.scene-button {
+  color: var(--accent-warm);
+}
+.controls-grid {
+  display: grid;
+  grid-template-columns: repeat(4, minmax(0, 1fr));
+  gap: 10px;
+}
+.controls-stack {
+  display: grid;
+  gap: 10px;
+}
+.controls-grid-parallelism {
+  grid-template-columns: repeat(5, minmax(0, 1fr));
+}
+.control-card {
+  border: 1px solid rgba(19, 58, 80, 0.08);
+  border-radius: 14px;
+  padding: 11px 12px;
+  background: rgba(250, 248, 242, 0.96);
+}
+.field-grid {
+  display: grid;
+  grid-template-columns: repeat(2, minmax(0, 1fr));
+  gap: 10px;
+}
+.field-grid-wide {
+  grid-template-columns: repeat(3, minmax(0, 1fr));
+}
+.control-card-header {
+  display: flex;
+  justify-content: space-between;
+  gap: 10px;
+  align-items: center;
+  margin-bottom: 10px;
+}
+.control-card-header h3 {
+  margin: 0;
+  color: var(--ink-strong);
+  font-size: 1rem;
+}
+.control-card-header p {
+  margin: 2px 0 0;
+  color: var(--ink-soft);
+  font-size: 0.84rem;
+}
+.control-badge {
+  padding: 5px 8px;
+  border-radius: 999px;
+  background: rgba(17, 122, 112, 0.1);
+  color: var(--accent-cool);
+  font-family: var(--font-mono);
+  font-size: 0.76rem;
+}
+.control-field {
+  display: grid;
+  gap: 6px;
+}
+.control-field span,
+.control-toggle span {
+  color: var(--ink-muted);
+  font-size: 0.76rem;
+  letter-spacing: 0.06em;
+  text-transform: uppercase;
+}
+.control-field input,
+.control-field select {
+  width: 100%;
+  padding: 8px 10px;
+  border: 1px solid rgba(19, 58, 80, 0.12);
+  border-radius: 10px;
+  background: #fffdf8;
+  color: var(--ink-strong);
+}
+.control-field-toggle {
+  align-self: end;
+}
+.control-toggle {
+  display: inline-flex;
+  align-items: center;
+  gap: 8px;
+  min-height: 40px;
+  padding: 8px 10px;
+  border: 1px solid rgba(19, 58, 80, 0.12);
+  border-radius: 10px;
+  background: #fffdf8;
+}
+.control-toggle input {
+  accent-color: var(--accent-cool);
+}
+.option-strip {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 6px;
+}
+.option-chip {
+  min-width: 38px;
+  padding: 7px 9px;
+  border: 1px solid rgba(19, 58, 80, 0.12);
+  border-radius: 10px;
+  background: #fffdf8;
+  color: var(--ink-soft);
+  font-weight: 500;
+  transition:
+    background-color 150ms ease,
+    border-color 150ms ease,
+    transform 150ms ease;
+}
+.option-chip:hover {
+  transform: translateY(-1px);
+}
+.option-chip.active {
+  border-color: rgba(17, 122, 112, 0.26);
+  background: rgba(225, 246, 241, 0.96);
+  color: var(--accent-cool);
+}
+.analysis-stack {
+  display: grid;
+  gap: 14px;
+  margin-top: 14px;
+  align-items: start;
+}
+.status-banner {
+  display: flex;
+  gap: 10px;
+  align-items: center;
+  padding: 10px 12px;
+  border-radius: 14px;
+  border: 1px solid rgba(214, 98, 37, 0.16);
+  background: rgba(255, 245, 236, 0.96);
+  color: var(--ink-soft);
+}
+.status-banner strong {
+  color: var(--accent-warm);
+}
+.map-panel {
+  padding: 12px;
+  border-radius: 16px;
+}
+.topology-header {
+  display: flex;
+  justify-content: space-between;
+  gap: 12px;
+  align-items: flex-end;
+  margin-bottom: 10px;
+}
+.topology-header-actions {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 10px;
+  align-items: center;
+  justify-content: flex-end;
+}
+.topology-scene-shell {
+  display: grid;
+  gap: 10px;
+}
+.scene-toolbar {
+  display: flex;
+  justify-content: flex-end;
+  gap: 10px;
+  align-items: center;
+}
+.scene-toolbar-actions {
+  display: flex;
+  gap: 8px;
+  flex-wrap: wrap;
+}
+.pixi-surface-wrap {
+  position: relative;
+  width: 100%;
+  overflow: hidden;
+  border-radius: 18px;
+  background:
+    radial-gradient(circle at 10% 10%, rgba(24, 155, 141, 0.14), transparent 22%),
+    radial-gradient(circle at 100% 0%, rgba(255, 175, 111, 0.16), transparent 24%),
+    linear-gradient(180deg, #0f202d 0%, #08141d 100%);
+}
+.topology-surface-wrap {
+  min-height: 760px;
+  height: min(76vh, 980px);
+  user-select: none;
+  touch-action: none;
+  overscroll-behavior: contain;
+}
+.topology-interaction-layer {
+  position: absolute;
+  inset: 0;
+  z-index: 1;
+  background: rgba(0, 0, 0, 0.001);
+  cursor: grab;
+  touch-action: none;
+  overscroll-behavior: contain;
+}
+.topology-interaction-layer.is-dragging {
+  cursor: grabbing;
+}
+.pixi-surface,
+.pixi-canvas,
+.pixi-surface canvas {
+  display: block;
+  width: 100%;
+  height: 100%;
+}
+.scene-inspector,
+.scene-debug-panel {
+  position: absolute;
+  z-index: 2;
+  max-width: min(320px, calc(100% - 32px));
+  border: 1px solid rgba(255, 255, 255, 0.08);
+  border-radius: 16px;
+  backdrop-filter: blur(14px);
+  pointer-events: auto;
+}
+.scene-inspector {
+  left: 16px;
+  bottom: 16px;
+  padding: 12px 14px;
+  background: rgba(7, 19, 29, 0.78);
+  color: rgba(229, 241, 246, 0.92);
+}
+.scene-inspector .mini-label {
+  color: rgba(135, 244, 226, 0.82);
+}
+.scene-inspector h3 {
+  margin: 0;
+  font-size: 1rem;
+}
+.inspector-subheading {
+  margin: 6px 0 0;
+  color: rgba(179, 201, 211, 0.82);
+  font-size: 0.88rem;
+}
+.inspector-link-note {
+  margin: 10px 0 0;
+  color: rgba(255, 223, 161, 0.9);
+  font-size: 0.82rem;
+}
+.inspector-grid {
+  display: grid;
+  grid-template-columns: repeat(2, minmax(0, 1fr));
+  gap: 10px 14px;
+  margin: 12px 0 0;
+}
+.inspector-grid div {
+  margin: 0;
+}
+.inspector-grid dt {
+  color: rgba(160, 188, 200, 0.78);
+  font-size: 0.68rem;
+}
+.inspector-grid dd {
+  margin: 4px 0 0;
+  color: rgba(243, 250, 252, 0.96);
+  font-size: 0.95rem;
+}
+.scene-debug-panel {
+  top: 16px;
+  right: 16px;
+  padding: 12px 14px;
+  background: rgba(10, 20, 31, 0.82);
+  color: rgba(225, 238, 244, 0.92);
+}
+.scene-debug-panel .mini-label {
+  color: rgba(255, 221, 156, 0.82);
+}
+.debug-toggle-grid {
+  display: grid;
+  gap: 8px;
+}
+.debug-toggle-grid label {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  color: rgba(228, 240, 245, 0.92);
+  font-size: 0.86rem;
+}
+.debug-toggle-grid input {
+  accent-color: var(--accent-warm);
+}
+.debug-stats {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 8px;
+  margin-top: 12px;
+}
+.debug-stats span {
+  padding: 5px 8px;
+  border-radius: 999px;
+  background: rgba(255, 255, 255, 0.06);
+  color: rgba(235, 245, 248, 0.9);
+  font-family: var(--font-mono);
+  font-size: 0.74rem;
+}
+.side-column {
+  display: grid;
+  gap: 14px;
+}
+.side-card {
+  padding: 12px;
+  border-radius: 16px;
+}
+.side-header {
+  margin-bottom: 12px;
+}
+.facts-grid {
+  display: grid;
+  gap: 10px;
+}
+.fact-row {
+  padding-bottom: 10px;
+  border-bottom: 1px solid rgba(19, 58, 80, 0.08);
+}
+.fact-row:last-child {
+  padding-bottom: 0;
+  border-bottom: 0;
+}
+.warning-list {
+  display: grid;
+  gap: 8px;
+  margin-top: 12px;
+}
+.warning-pill {
+  border-left: 3px solid rgba(214, 98, 37, 0.74);
+  border-radius: 10px;
+  padding: 9px 10px;
+  background: rgba(255, 244, 232, 0.92);
+  color: var(--ink-soft);
+  font-size: 0.88rem;
+}
+.fullscreen-overlay {
+  position: fixed;
+  inset: 0;
+  z-index: 40;
+  display: grid;
+  place-items: center;
+  padding: 20px;
+  background: rgba(4, 12, 20, 0.72);
+  backdrop-filter: blur(10px);
+}
+.fullscreen-shell {
+  display: grid;
+  gap: 12px;
+  width: min(1600px, 100%);
+  max-height: calc(100vh - 40px);
+  padding: 14px;
+  border: 1px solid rgba(255, 255, 255, 0.08);
+  border-radius: 22px;
+  background: rgba(252, 250, 245, 0.98);
+  box-shadow: 0 24px 80px rgba(4, 12, 20, 0.38);
+}
+.fullscreen-toolbar {
+  display: flex;
+  justify-content: space-between;
+  gap: 12px;
+  align-items: flex-end;
+}
+.fullscreen-toolbar h2 {
+  margin: 0;
+  color: var(--ink-strong);
+  font-size: 1.2rem;
+}
+.fullscreen-content {
+  min-height: 0;
+  overflow: auto;
+}
+.fullscreen-content .map-panel {
+  min-height: calc(100vh - 168px);
+}
+.fullscreen-content .topology-surface-wrap {
+  height: calc(100vh - 290px);
+  min-height: 680px;
+}
+@media (max-width: 1400px) {
+  .summary-strip {
+    grid-template-columns: repeat(2, minmax(0, 1fr));
+  }
+  .fullscreen-shell {
+    width: 100%;
+  }
+}
+@media (max-width: 1040px) {
+  .controls-grid,
+  .controls-grid-parallelism,
+  .field-grid,
+  .field-grid-wide {
+    grid-template-columns: repeat(2, minmax(0, 1fr));
+  }
+  .controls-head,
+  .topology-header,
+  .scene-toolbar,
+  .fullscreen-toolbar {
+    flex-direction: column;
+    align-items: flex-start;
+  }
+}
+@media (max-width: 760px) {
+  .workbench-shell {
+    padding: 12px;
+  }
+  .summary-strip,
+  .controls-grid,
+  .controls-grid-parallelism,
+  .field-grid,
+  .field-grid-wide,
+  .inspector-grid {
+    grid-template-columns: 1fr;
+  }
+  .topology-surface-wrap {
+    min-height: 560px;
+    height: 64vh;
+  }
+  .fullscreen-overlay {
+    padding: 10px;
+  }
+  .fullscreen-shell {
+    max-height: calc(100vh - 20px);
+    padding: 10px;
+  }
+  .fullscreen-content .topology-surface-wrap,
+  .fullscreen-content .topology-surface-wrap {
+    min-height: 420px;
+    height: 62vh;
+  }
+  .scene-inspector,
+  .scene-debug-panel {
+    position: static;
+    max-width: none;
+    margin: 10px;
+  }
+}

src/App.tsx ADDED Viewed

	@@ -0,0 +1,205 @@

+import { useEffect, useMemo, useState } from 'react'
+import './App.css'
+import { ClusterMap } from './components/ClusterMap'
+import { ControlsPanel } from './components/ControlsPanel'
+import { analyzeCluster } from './lib/trainingClusterModel'
+import { getScenarioConfig, getViewOptions } from './lib/viewOptions'
+import { buildWorkbenchViewModel } from './lib/workbenchPresenter'
+import { type WorkbenchConfig } from './lib/workbench'
+function App() {
+  const viewOptions = getViewOptions()
+  const [config, setConfig] = useState<WorkbenchConfig>(() =>
+    getScenarioConfig(viewOptions.scenario),
+  )
+  const [expandedView, setExpandedView] = useState<'cluster' | null>(null)
+  const analysis = useMemo(
+    () => analyzeCluster(config.model, config.training, config.cluster, config.parallelism),
+    [config],
+  )
+  const viewModel = useMemo(
+    () => buildWorkbenchViewModel(config, analysis),
+    [analysis, config],
+  )
+  useEffect(() => {
+    if (!expandedView) {
+      return undefined
+    }
+    const previousOverflow = document.body.style.overflow
+    document.body.style.overflow = 'hidden'
+    const handleKeyDown = (event: KeyboardEvent) => {
+      if (event.key === 'Escape') {
+        setExpandedView(null)
+      }
+    }
+    window.addEventListener('keydown', handleKeyDown)
+    return () => {
+      document.body.style.overflow = previousOverflow
+      window.removeEventListener('keydown', handleKeyDown)
+    }
+  }, [expandedView])
+  const handleConfigChange = (nextConfig: WorkbenchConfig) => {
+    setConfig(nextConfig)
+  }
+  const handleReset = () => {
+    setConfig(getScenarioConfig(viewOptions.scenario))
+  }
+  const clusterView = (
+    <section className="map-panel">
+      <div className="topology-header">
+        <div>
+          <p className="mini-label">Live cluster topology</p>
+          <h2>GPU fabric map</h2>
+        </div>
+        <div className="topology-header-actions">
+          <button
+            type="button"
+            className="scene-button"
+            onClick={() => setExpandedView('cluster')}
+          >
+            open full screen
+          </button>
+        </div>
+      </div>
+      <ClusterMap
+        viewModel={viewModel}
+        debugEnabled={viewOptions.debug}
+        snapshotMode={viewOptions.snapshot}
+        linkedFocus={null}
+      />
+    </section>
+  )
+  return (
+    <div className="workbench-shell">
+      <header className="app-topbar">
+        <div className="title-block">
+          <p className="mini-label">Illustrated training cluster</p>
+          <h1>[WIP] Parallelism workbench</h1>
+          <p className="title-copy">{viewModel.subheadline}</p>
+        </div>
+        {!analysis.feasible ? (
+          <div className="status-banner status-banner-danger" data-testid="infeasible-banner">
+            <strong>Infeasible configuration</strong>
+            <span>{analysis.infeasibilityReason}</span>
+          </div>
+        ) : null}
+        <section className="summary-strip" aria-label="simulation summary">
+          <div className="summary-card summary-card-wide">
+            <span>Scenario</span>
+            <strong>{viewModel.headline}</strong>
+            <p>
+              {config.cluster.numNodes} {config.cluster.nodeLabel ?? 'nodes'} · {config.cluster.gpuType.name}
+              {' · '}
+              {config.model.numLayers} layers · hidden {config.model.hiddenDim.toLocaleString()}
+            </p>
+          </div>
+          <div className="summary-card">
+            <span>Throughput</span>
+            <strong>{viewModel.summary.throughputLabel}</strong>
+            <p>{viewModel.summary.throughputNote}</p>
+          </div>
+          <div className="summary-card">
+            <span>Active GPUs</span>
+            <strong>{viewModel.summary.gpuLabel}</strong>
+            <p>{viewModel.summary.gpuNote}</p>
+          </div>
+          <div className="summary-card">
+            <span>Interconnect</span>
+            <strong>{viewModel.summary.interconnectLabel}</strong>
+            <p>{viewModel.summary.interconnectNote}</p>
+          </div>
+          <div className="summary-card">
+            <span>Bottleneck</span>
+            <strong>{viewModel.summary.bottleneckLabel}</strong>
+            <p>{viewModel.summary.bottleneckNote}</p>
+          </div>
+        </section>
+      </header>
+      <ControlsPanel
+        config={config}
+        onChange={handleConfigChange}
+        onReset={handleReset}
+        viewModel={viewModel}
+      />
+      <main className="analysis-stack">
+        {expandedView !== 'cluster' ? clusterView : null}
+        <section className="side-card">
+          <div className="side-header">
+            <p className="mini-label">Run breakdown</p>
+            <h3>{config.cluster.gpuType.name}</h3>
+          </div>
+          <div className="facts-grid">
+            {viewModel.facts.map((fact) => (
+              <div key={fact.label} className="fact-row">
+                <span>{fact.label}</span>
+                <strong>{fact.value}</strong>
+              </div>
+            ))}
+          </div>
+          <div className="warning-list" aria-live="polite">
+            {viewModel.warnings.map((warning) => (
+              <div key={warning} className="warning-pill">
+                {warning}
+              </div>
+            ))}
+          </div>
+        </section>
+      </main>
+      {expandedView ? (
+        <div
+          className="fullscreen-overlay"
+          role="dialog"
+          aria-modal="true"
+          onClick={(event) => {
+            if (event.target === event.currentTarget) {
+              setExpandedView(null)
+            }
+          }}
+        >
+          <div className="fullscreen-shell">
+            <div className="fullscreen-toolbar">
+              <div>
+                <p className="mini-label">Expanded view</p>
+                <h2>GPU fabric map</h2>
+              </div>
+              <button
+                type="button"
+                className="scene-button"
+                onClick={() => setExpandedView(null)}
+              >
+                close full screen
+              </button>
+            </div>
+            <div className="fullscreen-content">
+              {clusterView}
+            </div>
+          </div>
+        </div>
+      ) : null}
+    </div>
+  )
+}
+export default App

src/components/ClusterMap.tsx ADDED Viewed

	@@ -0,0 +1,2086 @@

+import {
+  useApplication,
+  useExtend,
+  useTick,
+} from '@pixi/react'
+import {
+  Container,
+  Graphics,
+  Text,
+  Ticker,
+  type Graphics as PixiGraphics,
+} from 'pixi.js'
+import {
+  useCallback,
+  useEffect,
+  useMemo,
+  useRef,
+  useState,
+  type PointerEvent as ReactPointerEvent,
+} from 'react'
+import { PixiSurface } from './pixi/PixiSurface'
+import {
+  buildTopologySceneModel,
+  describeTarget,
+  findHoverTarget,
+  getFitViewport,
+  worldToScreen,
+  type HoverTarget,
+  type SceneGpu,
+  type SceneNode,
+  type TargetDetails,
+  type TopologySceneModel,
+  type ViewportState,
+} from '../lib/topologyScene'
+import { matchesLinkedFocus, type LinkedFocus } from '../lib/linkedFocus'
+import { type WorkbenchViewModel } from '../lib/workbenchPresenter'
+import {
+  TOPOLOGY_LOD_POLICY,
+  getTopologyLodState,
+  mix,
+  screenStroke,
+  screenWorld,
+  type TopologyLodState,
+} from '../lib/topologyLod'
+type ClusterMapProps = {
+  viewModel: WorkbenchViewModel
+  debugEnabled: boolean
+  snapshotMode: boolean
+  linkedFocus: LinkedFocus | null
+}
+type DebugToggles = {
+  bounds: boolean
+  ids: boolean
+  heat: boolean
+  hitAreas: boolean
+  stats: boolean
+}
+type ScenePointer = {
+  x: number
+  y: number
+}
+type DebugObjectMap = Record<
+  string,
+  {
+    x: number
+    y: number
+    width: number
+    height: number
+  }
+>
+const MIN_SCALE = TOPOLOGY_LOD_POLICY.minScale
+const MAX_SCALE = TOPOLOGY_LOD_POLICY.maxScale
+const clamp = (value: number, min: number, max: number) =>
+  Math.min(Math.max(value, min), max)
+type ViewportConstraints = {
+  minScale: number
+  maxScale: number
+  minX: number
+  maxX: number
+  minY: number
+  maxY: number
+}
+const getViewportConstraints = (
+  model: TopologySceneModel,
+  width: number,
+  height: number,
+  scale: number,
+): ViewportConstraints => {
+  const fitViewport = getFitViewport(model, width, height)
+  const minScale = fitViewport.scale
+  const maxScale = clamp(Math.max(minScale * 180, minScale + 0.001), minScale, MAX_SCALE)
+  const safeScale = clamp(scale, minScale, maxScale)
+  const scaledWidth = model.width * safeScale
+  const scaledHeight = model.height * safeScale
+  const centeredX = (width - scaledWidth) / 2
+  const centeredY = (height - scaledHeight) / 2
+  if (scaledWidth <= width) {
+    return {
+      minScale,
+      maxScale,
+      minX: centeredX,
+      maxX: centeredX,
+      minY: scaledHeight <= height ? centeredY : height - scaledHeight,
+      maxY: scaledHeight <= height ? centeredY : 0,
+    }
+  }
+  if (scaledHeight <= height) {
+    return {
+      minScale,
+      maxScale,
+      minX: width - scaledWidth,
+      maxX: 0,
+      minY: centeredY,
+      maxY: centeredY,
+    }
+  }
+  return {
+    minScale,
+    maxScale,
+    minX: width - scaledWidth,
+    maxX: 0,
+    minY: height - scaledHeight,
+    maxY: 0,
+  }
+}
+const clampViewportToScene = (
+  nextViewport: ViewportState,
+  model: TopologySceneModel,
+  width: number,
+  height: number,
+): ViewportState => {
+  if (width <= 0 || height <= 0) {
+    return nextViewport
+  }
+  const constraints = getViewportConstraints(model, width, height, nextViewport.scale)
+  const scale = clamp(nextViewport.scale, constraints.minScale, constraints.maxScale)
+  const clamped = getViewportConstraints(model, width, height, scale)
+  return {
+    scale,
+    x: clamp(nextViewport.x, clamped.minX, clamped.maxX),
+    y: clamp(nextViewport.y, clamped.minY, clamped.maxY),
+  }
+}
+const noopDraw = (graphics: PixiGraphics) => {
+  graphics.clear()
+}
+const pulse = (timeMs: number, offset: number, depth: number) =>
+  1 + Math.sin(timeMs / 1000 * 1.8 + offset) * depth
+const drawCornerFocus = (
+  graphics: PixiGraphics,
+  bounds: { x: number; y: number; width: number; height: number },
+  scale: number,
+  color: number,
+  alpha: number,
+  lengthPx: number,
+  insetPx: number,
+  strokePx: number,
+) => {
+  const length = screenStroke(scale, lengthPx, 0.3, 16)
+  const inset = screenStroke(scale, insetPx, 0.12, 8)
+  const stroke = screenStroke(scale, strokePx, 0.08, 2.4)
+  const left = bounds.x - inset
+  const top = bounds.y - inset
+  const right = bounds.x + bounds.width + inset
+  const bottom = bounds.y + bounds.height + inset
+  graphics
+    .moveTo(left, top + length)
+    .lineTo(left, top)
+    .lineTo(left + length, top)
+    .stroke({ color, alpha, width: stroke, cap: 'square', join: 'miter' })
+  graphics
+    .moveTo(right - length, top)
+    .lineTo(right, top)
+    .lineTo(right, top + length)
+    .stroke({ color, alpha, width: stroke, cap: 'square', join: 'miter' })
+  graphics
+    .moveTo(left, bottom - length)
+    .lineTo(left, bottom)
+    .lineTo(left + length, bottom)
+    .stroke({ color, alpha, width: stroke, cap: 'square', join: 'miter' })
+  graphics
+    .moveTo(right - length, bottom)
+    .lineTo(right, bottom)
+    .lineTo(right, bottom - length)
+    .stroke({ color, alpha, width: stroke, cap: 'square', join: 'miter' })
+}
+function createDebugObjectMap(
+  model: TopologySceneModel,
+  viewport: ViewportState,
+): DebugObjectMap {
+  const pods = Object.fromEntries(
+    model.pods.map((pod) => [pod.id, worldToScreen(pod.hitBounds, viewport)]),
+  )
+  const nodes = Object.fromEntries(
+    model.nodes.map((node) => [node.id, worldToScreen(node.hitBounds, viewport)]),
+  )
+  const gpus = Object.fromEntries(
+    model.nodes
+      .flatMap((node) => node.gpus)
+      .map((gpu) => [gpu.id, worldToScreen(gpu.hitBounds, viewport)]),
+  )
+  return {
+    ...pods,
+    ...nodes,
+    ...gpus,
+  }
+}
+const screenRadius = (
+  scale: number,
+  pixels: number,
+  minWorld = 0.06,
+  maxWorld = 12,
+) => screenWorld(scale, pixels, minWorld, maxWorld)
+const makeRect = (x: number, y: number, width: number, height: number) => ({
+  x,
+  y,
+  width,
+  height,
+})
+const insetRect = (
+  rect: { x: number; y: number; width: number; height: number },
+  insetX: number,
+  insetY: number,
+) =>
+  makeRect(
+    rect.x + insetX,
+    rect.y + insetY,
+    Math.max(rect.width - insetX * 2, 0.0001),
+    Math.max(rect.height - insetY * 2, 0.0001),
+  )
+const getWorldViewportBounds = (
+  viewport: ViewportState,
+  width: number,
+  height: number,
+  paddingWorld: number,
+) =>
+  makeRect(
+    -viewport.x / viewport.scale - paddingWorld,
+    -viewport.y / viewport.scale - paddingWorld,
+    width / viewport.scale + paddingWorld * 2,
+    height / viewport.scale + paddingWorld * 2,
+  )
+const rectsIntersect = (
+  left: { x: number; y: number; width: number; height: number },
+  right: { x: number; y: number; width: number; height: number },
+) =>
+  left.x <= right.x + right.width &&
+  left.x + left.width >= right.x &&
+  left.y <= right.y + right.height &&
+  left.y + left.height >= right.y
+const lineBounds = (
+  x1: number,
+  y1: number,
+  x2: number,
+  y2: number,
+  pad: number,
+) =>
+  makeRect(
+    Math.min(x1, x2) - pad,
+    Math.min(y1, y2) - pad,
+    Math.abs(x2 - x1) + pad * 2,
+    Math.abs(y2 - y1) + pad * 2,
+  )
+function drawModule(
+  graphics: PixiGraphics,
+  gpu: SceneGpu,
+  scale: number,
+  linked: boolean,
+  lod: TopologyLodState,
+  emphasis: number,
+) {
+  const outer = gpu.lodFrame
+  const projectedOuterWidth = outer.width * scale
+  const projectedOuterHeight = outer.height * scale
+  const activeLoad = gpu.active ? mix(0.42, 1, gpu.utilization) : 0
+  const shell = insetRect(outer, outer.width * 0.04, outer.height * 0.06)
+  const carrier = insetRect(shell, shell.width * 0.05, shell.height * 0.08)
+  const coldPlate = insetRect(carrier, carrier.width * 0.14, carrier.height * 0.18)
+  const packageFrame = insetRect(coldPlate, coldPlate.width * 0.1, coldPlate.height * 0.13)
+  const substrate = insetRect(packageFrame, packageFrame.width * 0.06, packageFrame.height * 0.1)
+  const interposer = insetRect(substrate, substrate.width * 0.1, substrate.height * 0.14)
+  const die = insetRect(interposer, interposer.width * 0.2, interposer.height * 0.2)
+  const dieGrid = insetRect(die, die.width * 0.04, die.height * 0.05)
+  const connectorStrip = makeRect(
+    shell.x + shell.width * 0.24,
+    shell.y + shell.height * 0.82,
+    shell.width * 0.52,
+    shell.height * 0.08,
+  )
+  const boardStroke = linked ? 0xffefc0 : 0xcfdbe2
+  const overview = Math.max(lod.weights.overview - lod.weights.board * 0.18, 0)
+  const board = Math.max(lod.weights.board - lod.weights.package * 0.42, 0)
+  const packageAlpha = Math.max(lod.weights.package - lod.weights.silicon * 0.52, 0)
+  const siliconAlpha = Math.max(lod.weights.silicon - lod.weights.micro * 0.4, 0)
+  const microAlpha = lod.weights.micro
+  const boardPresence = Math.max(
+    lod.weights.board,
+    lod.weights.package * 0.84,
+    lod.weights.silicon * 0.66,
+  )
+  const coldPlatePresence = Math.max(board * 0.7, packageAlpha * 0.88, siliconAlpha * 0.9, microAlpha * 0.8)
+  const shellAlpha = mix(gpu.active ? 0.84 : 0.42, gpu.active ? 0.96 : 0.56, boardPresence)
+  const frameAlpha = emphasis * (linked ? 0.92 : 0.56)
+  const boardStrokeWidth = screenStroke(scale, linked ? 1.25 : 0.9, 0.08, 0.95)
+  const detailStroke = screenStroke(scale, 0.6, 0.03, 0.5)
+  const boardCorner = screenRadius(scale, 8, 0.18, 2.6)
+  const innerCorner = screenRadius(scale, 5, 0.16, 2)
+  const dieCorner = screenRadius(scale, 4, 0.14, 1.5)
+  const renderCarrier = projectedOuterWidth >= 10 && projectedOuterHeight >= 8
+  const renderColdPlate = projectedOuterWidth >= 14 && projectedOuterHeight >= 10
+  const renderOverviewGlyph = overview > 0.02 && projectedOuterWidth >= 10
+  const renderConnectorStrip = (overview > 0.02 || board > 0.02) && projectedOuterWidth >= 15
+  const renderBoardTier = board > 0.03 && projectedOuterWidth >= 18
+  const renderPackageTier = packageAlpha > 0.04 && projectedOuterWidth >= 30
+  const renderSiliconTier = siliconAlpha > 0.05 && die.width * scale >= 26
+  const renderMicroTier = microAlpha > 0.06 && die.width * scale >= 72
+  const glowFrame = makeRect(
+    shell.x - outer.width * 0.035,
+    shell.y - outer.height * 0.05,
+    shell.width + outer.width * 0.07,
+    shell.height + outer.height * 0.1,
+  )
+  if (activeLoad > 0.001) {
+    graphics
+      .roundRect(
+        glowFrame.x,
+        glowFrame.y,
+        glowFrame.width,
+        glowFrame.height,
+        screenRadius(scale, 10, 0.22, 3),
+      )
+      .fill({
+        color: 0x59e7d2,
+        alpha:
+          emphasis *
+          mix(
+            projectedOuterWidth < 18 ? 0.08 : 0.04,
+            projectedOuterWidth < 18 ? 0.2 : 0.1,
+            activeLoad,
+          ),
+      })
+  }
+  graphics
+    .roundRect(shell.x, shell.y, shell.width, shell.height, boardCorner)
+    .fill({ color: gpu.active ? 0x0d1f29 : 0x0b1821, alpha: shellAlpha * emphasis })
+    .stroke({ color: boardStroke, alpha: frameAlpha, width: boardStrokeWidth })
+  if (projectedOuterWidth < 8 || projectedOuterHeight < 6) {
+    if (activeLoad > 0.001) {
+      const signalWidth = Math.min(
+        shell.width * 0.54,
+        screenWorld(scale, 5.6, 0.14, shell.width * 0.54),
+      )
+      const signalHeight = Math.min(
+        shell.height * 0.34,
+        screenWorld(scale, 2.8, 0.1, shell.height * 0.34),
+      )
+      const signalX = shell.x + (shell.width - signalWidth) / 2
+      const signalY = shell.y + (shell.height - signalHeight) / 2
+      graphics
+        .roundRect(
+          signalX,
+          signalY,
+          signalWidth,
+          signalHeight,
+          screenRadius(scale, 2.2, 0.05, 0.34),
+        )
+        .fill({
+          color: 0x76f1df,
+          alpha: emphasis * mix(0.68, 1, activeLoad),
+        })
+    }
+    return
+  }
+  if (projectedOuterWidth < 15 || projectedOuterHeight < 10) {
+    const core = insetRect(shell, shell.width * 0.3, shell.height * 0.28)
+    graphics
+      .roundRect(
+        core.x,
+        core.y,
+        core.width,
+        core.height,
+        screenRadius(scale, 1.8, 0.04, 0.4),
+      )
+      .fill({
+        color: gpu.active ? 0x6ce9d7 : 0x193843,
+        alpha: emphasis * (gpu.active ? mix(0.6, 0.95, activeLoad) : 0.36),
+      })
+    return
+  }
+  if (renderCarrier) {
+    graphics
+      .roundRect(carrier.x, carrier.y, carrier.width, carrier.height, innerCorner)
+      .fill({
+        color: gpu.active ? 0x112833 : 0x10202a,
+        alpha: mix(0.56, 0.82, boardPresence) * emphasis,
+      })
+  }
+  if (renderColdPlate) {
+    graphics
+      .roundRect(
+        coldPlate.x,
+        coldPlate.y,
+        coldPlate.width,
+        coldPlate.height,
+        screenRadius(scale, 4.5, 0.12, 1.8),
+      )
+      .fill({
+        color: 0x163643,
+        alpha:
+          mix(0.02, 0.34, coldPlatePresence) *
+          emphasis *
+          Math.max(1 - microAlpha * 0.24, 0.76),
+      })
+  }
+  if (renderConnectorStrip) {
+    const connectorAlpha = Math.max(overview * 0.8, board * 0.55) * emphasis * (gpu.active ? 0.84 : 0.36)
+    const padCount = 6
+    const padWidth = connectorStrip.width * 0.11
+    const padGap = connectorStrip.width * 0.05
+    const totalWidth = padCount * padWidth + (padCount - 1) * padGap
+    const padStart = connectorStrip.x + (connectorStrip.width - totalWidth) / 2
+    for (let index = 0; index < padCount; index += 1) {
+      const padX = padStart + index * (padWidth + padGap)
+      graphics
+        .roundRect(
+          padX,
+          connectorStrip.y,
+          padWidth,
+          connectorStrip.height,
+          screenRadius(scale, 2, 0.04, 0.6),
+        )
+        .fill({ color: 0xd6ba72, alpha: connectorAlpha })
+    }
+  }
+  if (renderOverviewGlyph) {
+    const moduleWindow = insetRect(carrier, carrier.width * 0.24, carrier.height * 0.26)
+    const dieWindow = makeRect(
+      moduleWindow.x + moduleWindow.width * 0.31,
+      moduleWindow.y + moduleWindow.height * 0.26,
+      moduleWindow.width * 0.38,
+      moduleWindow.height * 0.48,
+    )
+    graphics
+      .roundRect(
+        moduleWindow.x,
+        moduleWindow.y,
+        moduleWindow.width,
+        moduleWindow.height,
+        screenRadius(scale, 2.8, 0.06, 0.9),
+      )
+      .fill({
+        color: gpu.active ? 0x235560 : 0x1a3d48,
+        alpha: overview * emphasis * mix(gpu.active ? 0.5 : 0.42, gpu.active ? 0.82 : 0.42, activeLoad),
+      })
+    for (const x of [
+      moduleWindow.x + moduleWindow.width * 0.14,
+      moduleWindow.x + moduleWindow.width * 0.76,
+    ]) {
+      graphics
+        .roundRect(
+          x,
+          moduleWindow.y + moduleWindow.height * 0.28,
+          moduleWindow.width * 0.08,
+          moduleWindow.height * 0.44,
+          screenRadius(scale, 1.3, 0.03, 0.35),
+        )
+        .fill({
+          color: gpu.active ? 0xdaf08e : 0xcddd73,
+          alpha: overview * emphasis * mix(gpu.active ? 0.8 : 0.62, 1, activeLoad * 0.7),
+        })
+    }
+    graphics
+      .roundRect(
+        dieWindow.x,
+        dieWindow.y,
+        dieWindow.width,
+        dieWindow.height,
+        screenRadius(scale, 1.7, 0.03, 0.42),
+      )
+      .fill({
+        color: gpu.active ? 0x0b1820 : 0x081219,
+        alpha: overview * emphasis * mix(gpu.active ? 0.92 : 0.86, 1, activeLoad * 0.4),
+      })
+  }
+  if (renderBoardTier) {
+    graphics
+      .roundRect(
+        coldPlate.x,
+        coldPlate.y,
+        coldPlate.width,
+        coldPlate.height,
+        screenRadius(scale, 4.5, 0.1, 1.2),
+      )
+      .stroke({
+        color: 0x88b9c6,
+        alpha: board * emphasis * 0.34,
+        width: detailStroke,
+      })
+    const mountRadius = screenWorld(scale, 2.6, 0.03, 0.26)
+    const mountAlpha = board * emphasis * (gpu.active ? 0.32 : 0.14)
+    for (const [x, y] of [
+      [carrier.x + carrier.width * 0.16, carrier.y + carrier.height * 0.2],
+      [carrier.x + carrier.width * 0.84, carrier.y + carrier.height * 0.2],
+      [carrier.x + carrier.width * 0.16, carrier.y + carrier.height * 0.74],
+      [carrier.x + carrier.width * 0.84, carrier.y + carrier.height * 0.74],
+    ]) {
+      graphics.circle(x, y, mountRadius).fill({ color: 0x8ab7b7, alpha: mountAlpha })
+    }
+    if (activeLoad > 0.001) {
+      const liveZone = insetRect(coldPlate, coldPlate.width * 0.3, coldPlate.height * 0.28)
+      graphics
+        .roundRect(
+          liveZone.x,
+          liveZone.y,
+          liveZone.width,
+          liveZone.height,
+          screenRadius(scale, 3, 0.06, 0.8),
+        )
+        .fill({
+          color: 0x64e6d4,
+          alpha: board * emphasis * mix(0.12, 0.28, activeLoad),
+        })
+    }
+  }
+  if (renderPackageTier) {
+    graphics
+      .roundRect(packageFrame.x, packageFrame.y, packageFrame.width, packageFrame.height, innerCorner)
+      .stroke({ color: 0xb7c7cd, alpha: packageAlpha * emphasis * 0.8, width: detailStroke })
+    graphics
+      .roundRect(substrate.x, substrate.y, substrate.width, substrate.height, innerCorner)
+      .fill({ color: 0x294546, alpha: packageAlpha * emphasis * 0.34 })
+    graphics
+      .roundRect(interposer.x, interposer.y, interposer.width, interposer.height, innerCorner)
+      .fill({ color: 0x2a5960, alpha: packageAlpha * emphasis * 0.3 })
+      .stroke({ color: 0x9deedb, alpha: packageAlpha * emphasis * 0.18, width: detailStroke })
+    const hbmWidth = interposer.width * 0.18
+    const hbmHeight = interposer.height * 0.16
+    for (let index = 0; index < 4; index += 1) {
+      const hbmX = interposer.x + interposer.width * 0.04 + index * (hbmWidth + interposer.width * 0.03)
+      for (const y of [interposer.y + interposer.height * 0.09, interposer.y + interposer.height * 0.75]) {
+        graphics
+          .roundRect(
+            hbmX,
+            y,
+            hbmWidth,
+            hbmHeight,
+            screenRadius(scale, 2, 0.04, 0.45),
+          )
+          .fill({ color: 0xcfd86f, alpha: packageAlpha * emphasis * 0.7 })
+      }
+    }
+    graphics
+      .roundRect(die.x, die.y, die.width, die.height, dieCorner)
+      .fill({ color: 0x09161d, alpha: packageAlpha * emphasis * 0.76 })
+      .stroke({ color: 0x8bdacd, alpha: packageAlpha * emphasis * 0.24, width: detailStroke })
+  }
+  if (renderSiliconTier) {
+    graphics
+      .roundRect(die.x, die.y, die.width, die.height, dieCorner)
+      .fill({ color: 0x0c1c22, alpha: siliconAlpha * emphasis * 0.58 })
+    const tileColumns = 7
+    const tileRows = 5
+    const tileWidth = dieGrid.width / tileColumns
+    const tileHeight = dieGrid.height / tileRows
+    for (let row = 0; row < tileRows; row += 1) {
+      for (let column = 0; column < tileColumns; column += 1) {
+        const tileX = dieGrid.x + column * tileWidth
+        const tileY = dieGrid.y + row * tileHeight
+        const tileFill =
+          column === 0
+            ? 0xa2d8ec
+            : row === 0 || row === tileRows - 1
+              ? 0x7fb7ca
+              : 0xb8ece2
+        graphics
+          .roundRect(
+            tileX + tileWidth * 0.08,
+            tileY + tileHeight * 0.12,
+            tileWidth * 0.8,
+            tileHeight * 0.72,
+            screenRadius(scale, 1.2, 0.03, 0.26),
+          )
+          .fill({ color: tileFill, alpha: siliconAlpha * emphasis * (column === 0 ? 0.22 : 0.14) })
+      }
+    }
+    for (const block of [
+      makeRect(die.x + die.width * 0.06, die.y + die.height * 0.18, die.width * 0.14, die.height * 0.64),
+      makeRect(die.x + die.width * 0.78, die.y + die.height * 0.26, die.width * 0.1, die.height * 0.48),
+    ]) {
+      graphics
+        .roundRect(
+          block.x,
+          block.y,
+          block.width,
+          block.height,
+          screenRadius(scale, 1.2, 0.03, 0.3),
+        )
+        .fill({ color: 0xaee6ff, alpha: siliconAlpha * emphasis * 0.14 })
+    }
+  }
+  if (renderMicroTier) {
+    const cellColumns = 38
+    const cellRows = 24
+    const cellWidth = dieGrid.width / cellColumns
+    const cellHeight = dieGrid.height / cellRows
+    const cellAlpha = microAlpha * emphasis * 0.22
+    for (let row = 0; row < cellRows; row += 1) {
+      for (let column = 0; column < cellColumns; column += 1) {
+        const x = dieGrid.x + column * cellWidth
+        const y = dieGrid.y + row * cellHeight
+        const edgeZone = column < 4 || column > cellColumns - 5 || row < 2 || row > cellRows - 3
+        const seam = column % 6 === 0 || row % 5 === 0
+        const primaryColor = edgeZone
+          ? 0x79afbd
+          : seam
+            ? 0x91d2dc
+            : (row + column) % 5 === 0
+              ? 0xc7fff0
+              : (row + column) % 3 === 0
+                ? 0x94d9ef
+                : 0xafe9dc
+        graphics
+          .roundRect(
+            x + cellWidth * 0.12,
+            y + cellHeight * 0.16,
+            cellWidth * 0.72,
+            cellHeight * 0.56,
+            screenRadius(scale, 0.18, 0.002, 0.05),
+          )
+          .fill({ color: primaryColor, alpha: cellAlpha * (seam ? 0.58 : 1) })
+      }
+    }
+  }
+}
+function drawNodeShell(
+  graphics: PixiGraphics,
+  node: SceneNode,
+  scale: number,
+  linked: boolean,
+  heatEnabled: boolean,
+  lod: TopologyLodState,
+  emphasis: number,
+) {
+  const shellAlpha = mix(0.04, 0.14, lod.weights.board) * emphasis
+  const trayOutlineAlpha = mix(0.08, 0.22, lod.weights.board) * emphasis
+  const nodeRadius = screenRadius(scale, 18, 0.8, 10)
+  graphics
+    .roundRect(node.x, node.y, node.width, node.height, nodeRadius)
+    .fill({ color: 0x09131b, alpha: 0.86 })
+    .stroke({
+      color: linked ? 0xffdc8a : 0x6fd9cd,
+      alpha: linked ? 0.82 : trayOutlineAlpha,
+        width: screenStroke(scale, linked ? 1.2 : 0.7, 0.08, 0.85),
+    })
+  if (shellAlpha > 0.02) {
+    graphics
+      .roundRect(
+        node.x + 2.5,
+        node.y + 2.5,
+        node.width - 5,
+        node.height - 5,
+        screenRadius(scale, 14, 0.6, 8),
+      )
+      .fill({ color: 0x0b1720, alpha: shellAlpha })
+  }
+  if (heatEnabled) {
+    graphics
+      .roundRect(node.x + 6, node.y + 6, node.width - 12, node.height - 12, 8)
+      .fill({
+        color: 0xe58a43,
+        alpha: node.interNodeLoad * 0.08 * emphasis,
+      })
+  }
+}
+function drawCampusPods(
+  graphics: PixiGraphics,
+  model: TopologySceneModel,
+  scale: number,
+  lod: TopologyLodState,
+  visiblePods: typeof model.pods,
+  podEmphasis: (podId: string) => number,
+) {
+  const rackFabricAlpha = mix(0.02, 0.08, lod.weights.overview)
+  for (let row = 0; row < model.podRows; row += 1) {
+    const rowPods = model.pods.slice(row * model.podColumns, row * model.podColumns + model.podColumns)
+    if (rowPods.length < 2) {
+      continue
+    }
+    graphics
+      .moveTo(rowPods[0].centerX, rowPods[0].centerY)
+      .lineTo(rowPods[rowPods.length - 1].centerX, rowPods[rowPods.length - 1].centerY)
+      .stroke({
+        color: 0xf1b067,
+        alpha: rackFabricAlpha * Math.min(podEmphasis(rowPods[0].id), podEmphasis(rowPods[rowPods.length - 1].id)),
+        width: screenStroke(scale, 2.4, 0.12, 2.2),
+      })
+  }
+  for (let column = 0; column < model.podColumns; column += 1) {
+    const columnPods = model.pods.filter((_, index) => index % model.podColumns === column)
+    if (columnPods.length < 2) {
+      continue
+    }
+    graphics
+      .moveTo(columnPods[0].centerX, columnPods[0].centerY)
+      .lineTo(columnPods[columnPods.length - 1].centerX, columnPods[columnPods.length - 1].centerY)
+      .stroke({
+        color: 0xf1b067,
+        alpha:
+          rackFabricAlpha *
+          Math.min(podEmphasis(columnPods[0].id), podEmphasis(columnPods[columnPods.length - 1].id)),
+        width: screenStroke(scale, 2.1, 0.12, 2),
+      })
+  }
+  const rackInnerAlpha = mix(0.02, 0.08, lod.weights.board)
+  for (const pod of visiblePods) {
+    const emphasis = podEmphasis(pod.id)
+    graphics
+      .roundRect(pod.x, pod.y, pod.width, pod.height, screenRadius(scale, 22, 1.2, 18))
+      .fill({
+        color: 0x08131c,
+        alpha: mix(pod.active ? 0.76 : 0.66, pod.active ? 0.88 : 0.8, lod.weights.board) * emphasis,
+      })
+      .stroke({
+        color: pod.active ? 0xe6dbb1 : 0x5ecfca,
+        alpha: (pod.active ? 0.34 : 0.14) * emphasis,
+        width: screenStroke(scale, pod.active ? 1.3 : 0.8, 0.08, 1),
+      })
+    if (rackInnerAlpha > 0.02) {
+      graphics
+        .roundRect(
+          pod.x + 8,
+          pod.y + 8,
+          pod.width - 16,
+          pod.height - 16,
+          screenRadius(scale, 18, 0.8, 14),
+        )
+        .stroke({
+          color: 0x6fd9cd,
+          alpha: rackInnerAlpha * emphasis,
+          width: screenStroke(scale, 0.45, 0.04, 0.5),
+        })
+    }
+  }
+}
+function TopologyScene({
+  model,
+  viewport,
+  surfaceSize,
+  hoveredTarget,
+  pinnedTarget,
+  linkedFocus,
+  linkedGpuIds,
+  linkedNodeIds,
+  linkedPodIds,
+  debugEnabled,
+  snapshotMode,
+  debugToggles,
+  onFpsChange,
+}: {
+  model: TopologySceneModel
+  viewport: ViewportState
+  surfaceSize: { width: number; height: number }
+  hoveredTarget: HoverTarget | null
+  pinnedTarget: HoverTarget | null
+  linkedFocus: LinkedFocus | null
+  linkedGpuIds: Set<string>
+  linkedNodeIds: Set<string>
+  linkedPodIds: Set<string>
+  debugEnabled: boolean
+  snapshotMode: boolean
+  debugToggles: DebugToggles
+  onFpsChange: (value: number) => void
+}) {
+  useExtend({ Container, Graphics, Text })
+  const { app } = useApplication()
+  const dynamicRef = useRef<PixiGraphics | null>(null)
+  const hoverRef = useRef<HoverTarget | null>(hoveredTarget)
+  const pinnedRef = useRef<HoverTarget | null>(pinnedTarget)
+  const statsRef = useRef({ elapsed: 0, frames: 0 })
+  const allGpus = useMemo(
+    () => model.nodes.flatMap((node) => node.gpus),
+    [model.nodes],
+  )
+  const gpuById = useMemo(() => new Map(allGpus.map((gpu) => [gpu.id, gpu])), [allGpus])
+  const nodeById = useMemo(() => new Map(model.nodes.map((node) => [node.id, node])), [model.nodes])
+  const podById = useMemo(() => new Map(model.pods.map((pod) => [pod.id, pod])), [model.pods])
+  const lodState = useMemo(() => getTopologyLodState(viewport.scale), [viewport.scale])
+  const worldViewportBounds = useMemo(
+    () =>
+      getWorldViewportBounds(
+        viewport,
+        surfaceSize.width,
+        surfaceSize.height,
+        screenWorld(viewport.scale, 180, 12, 240),
+      ),
+    [surfaceSize.height, surfaceSize.width, viewport],
+  )
+  const visiblePods = useMemo(
+    () => model.pods.filter((pod) => rectsIntersect(pod.hitBounds, worldViewportBounds)),
+    [model.pods, worldViewportBounds],
+  )
+  const visibleNodes = useMemo(
+    () => model.nodes.filter((node) => rectsIntersect(node.hitBounds, worldViewportBounds)),
+    [model.nodes, worldViewportBounds],
+  )
+  const visibleGpus = useMemo(
+    () => visibleNodes.flatMap((node) => node.gpus),
+    [visibleNodes],
+  )
+  const visibleLinks = useMemo(
+    () => ({
+      row: model.rowLinks.filter((link) =>
+        rectsIntersect(lineBounds(link.x1, link.y1, link.x2, link.y2, link.hitWidth), worldViewportBounds),
+      ),
+      column: model.columnLinks.filter((link) =>
+        rectsIntersect(lineBounds(link.x1, link.y1, link.x2, link.y2, link.hitWidth), worldViewportBounds),
+      ),
+      bus: model.busLinks.filter((link) =>
+        rectsIntersect(lineBounds(link.x1, link.y1, link.x2, link.y2, link.hitWidth), worldViewportBounds),
+      ),
+    }),
+    [model.busLinks, model.columnLinks, model.rowLinks, worldViewportBounds],
+  )
+  const visibleLinkCount = useMemo(
+    () => visibleLinks.row.length + visibleLinks.column.length + visibleLinks.bus.length,
+    [visibleLinks.bus.length, visibleLinks.column.length, visibleLinks.row.length],
+  )
+  useEffect(() => {
+    hoverRef.current = hoveredTarget
+  }, [hoveredTarget])
+  useEffect(() => {
+    pinnedRef.current = pinnedTarget
+  }, [pinnedTarget])
+  useEffect(() => {
+    if (debugEnabled || snapshotMode) {
+      window.__PIXI_TOPOLOGY_APP__ = app
+      return () => {
+        delete window.__PIXI_TOPOLOGY_APP__
+      }
+    }
+    return undefined
+  }, [app, debugEnabled, snapshotMode])
+  const getEmphasis = useCallback(
+    (kind: 'pod' | 'node' | 'gpu', id: string) => {
+      const focusTarget = pinnedRef.current ?? hoverRef.current
+      const base = 1
+      if (!focusTarget || lodState.deepIsolation <= 0.001) {
+        return base
+      }
+      const fadeTo = mix(1, 0.08, lodState.deepIsolation)
+      if (kind === 'gpu') {
+        if (focusTarget.kind === 'gpu') {
+          const gpu = gpuById.get(id)
+          const focusedGpu = gpuById.get(focusTarget.id)
+          if (!gpu || !focusedGpu) {
+            return fadeTo
+          }
+          if (gpu.id === focusedGpu.id) {
+            return 1
+          }
+          if (gpu.nodeId === focusedGpu.nodeId) {
+            return mix(1, 0.34, lodState.deepIsolation)
+          }
+          if (gpu.domainIndex === focusedGpu.domainIndex) {
+            return mix(1, 0.16, lodState.deepIsolation)
+          }
+          return fadeTo
+        }
+        if (focusTarget.kind === 'node') {
+          const gpu = gpuById.get(id)
+          const focusedNode = nodeById.get(focusTarget.id)
+          if (!gpu || !focusedNode) {
+            return fadeTo
+          }
+          if (gpu.nodeId === focusedNode.id) {
+            return mix(1, 0.9, lodState.deepIsolation * 0.2)
+          }
+          if (gpu.domainIndex === focusedNode.domainIndex) {
+            return mix(1, 0.18, lodState.deepIsolation)
+          }
+          return fadeTo
+        }
+        const gpu = gpuById.get(id)
+        const focusedPod = podById.get(focusTarget.id)
+        if (!gpu || !focusedPod) {
+          return fadeTo
+        }
+        return gpu.domainIndex === focusedPod.index ? mix(1, 0.72, lodState.deepIsolation * 0.3) : fadeTo
+      }
+      if (kind === 'node') {
+        const node = nodeById.get(id)
+        if (!node) {
+          return fadeTo
+        }
+        if (focusTarget.kind === 'gpu') {
+          const gpu = gpuById.get(focusTarget.id)
+          if (!gpu) {
+            return fadeTo
+          }
+          if (node.id === gpu.nodeId) {
+            return mix(1, 0.5, lodState.deepIsolation)
+          }
+          if (node.domainIndex === gpu.domainIndex) {
+            return mix(1, 0.18, lodState.deepIsolation)
+          }
+          return fadeTo
+        }
+        if (focusTarget.kind === 'node') {
+          const focusedNode = nodeById.get(focusTarget.id)
+          if (!focusedNode) {
+            return fadeTo
+          }
+          if (node.id === focusedNode.id) {
+            return 1
+          }
+          if (node.domainIndex === focusedNode.domainIndex) {
+            return mix(1, 0.2, lodState.deepIsolation)
+          }
+          return fadeTo
+        }
+        const focusedPod = podById.get(focusTarget.id)
+        if (!focusedPod) {
+          return fadeTo
+        }
+        return node.domainIndex === focusedPod.index ? mix(1, 0.3, lodState.deepIsolation) : fadeTo
+      }
+      const pod = podById.get(id)
+      if (!pod) {
+        return fadeTo
+      }
+      if (focusTarget.kind === 'gpu') {
+        const gpu = gpuById.get(focusTarget.id)
+        return gpu && gpu.domainIndex === pod.index ? mix(1, 0.25, lodState.deepIsolation) : fadeTo
+      }
+      if (focusTarget.kind === 'node') {
+        const node = nodeById.get(focusTarget.id)
+        return node && node.domainIndex === pod.index ? mix(1, 0.32, lodState.deepIsolation) : fadeTo
+      }
+      return focusTarget.id === id ? 1 : fadeTo
+    },
+    [gpuById, lodState.deepIsolation, nodeById, podById],
+  )
+  const drawStatic = useCallback(
+    (graphics: PixiGraphics) => {
+      graphics.clear()
+      drawCampusPods(graphics, model, viewport.scale, lodState, visiblePods, (podId) =>
+        getEmphasis('pod', podId),
+      )
+      const localStructurePresence = Math.max(
+        lodState.weights.board,
+        lodState.weights.package * 0.9,
+        lodState.weights.silicon * 0.7,
+        lodState.weights.micro * 0.45,
+      )
+      const connectorAlpha = 0.18 * localStructurePresence
+      const linkPresence = mix(lodState.weights.overview * 0.35, 1, lodState.weights.board)
+      const hubRadius = screenWorld(viewport.scale, 6, 0.1, 2.4)
+      const drawStaticLink = (link: (typeof model.rowLinks)[number]) => {
+        const isRackScope = link.scope === 'rack'
+        if (!isRackScope && localStructurePresence < 0.08) {
+          return
+        }
+        const rackFrom =
+          isRackScope
+            ? model.pods.find((pod) => pod.centerX === link.x1 && pod.centerY === link.y1)
+            : null
+        const rackTo =
+          isRackScope
+            ? model.pods.find((pod) => pod.centerX === link.x2 && pod.centerY === link.y2)
+            : null
+        const emphasis =
+          isRackScope
+            ? Math.min(
+                rackFrom ? getEmphasis('pod', rackFrom.id) : 1,
+                rackTo ? getEmphasis('pod', rackTo.id) : 1,
+              )
+            : 1
+        graphics
+          .moveTo(link.x1, link.y1)
+          .lineTo(link.x2, link.y2)
+          .stroke({
+            color: link.color,
+            alpha:
+              (isRackScope
+                ? 0.08 + link.load * 0.24
+                : (0.04 + link.load * 0.12) * localStructurePresence) *
+              linkPresence *
+              emphasis,
+            width: screenStroke(
+              viewport.scale,
+              isRackScope ? 1.6 + link.load * 2 : 0.75 + link.load * 0.9,
+              0.05,
+              2.2,
+            ),
+          })
+      }
+      visibleLinks.row.forEach(drawStaticLink)
+      visibleLinks.column.forEach(drawStaticLink)
+      visibleLinks.bus.forEach((link) => {
+        if (localStructurePresence < 0.12) {
+          return
+        }
+        graphics
+          .moveTo(link.x1, link.y1)
+          .lineTo(link.x2, link.y2)
+          .stroke({
+            color: link.color,
+            alpha: (0.05 + link.load * 0.16) * linkPresence * localStructurePresence,
+            width: screenStroke(viewport.scale, 0.55 + link.load * 0.55, 0.05, 1.1),
+          })
+      })
+      for (const node of visibleNodes) {
+        const nodeEmphasis = getEmphasis('node', node.id)
+        if (localStructurePresence >= 0.08) {
+          drawNodeShell(
+            graphics,
+            node,
+            viewport.scale,
+            linkedNodeIds.has(node.id),
+            debugToggles.heat,
+            lodState,
+            nodeEmphasis,
+          )
+          graphics.circle(node.hubX, node.hubY, hubRadius).fill({
+            color: linkedNodeIds.has(node.id) ? 0xffcf7a : 0x89f8ea,
+            alpha:
+              ((linkedNodeIds.has(node.id) ? 0.68 : 0.08 + node.interNodeLoad * 0.22) *
+                nodeEmphasis *
+                localStructurePresence),
+          })
+        }
+        for (const gpu of node.gpus) {
+          const gpuEmphasis = getEmphasis('gpu', gpu.id)
+          const gpuCenterX = gpu.x + gpu.width / 2
+          const connectorStartY =
+            gpu.y + gpu.height / 2 <= node.hubY ? gpu.y + gpu.height : gpu.y
+          const connectorEndY =
+            gpu.y + gpu.height / 2 <= node.hubY ? node.hubY - 4 : node.hubY + 4
+          if (localStructurePresence >= 0.08) {
+            graphics
+              .moveTo(gpuCenterX, connectorStartY)
+              .lineTo(gpuCenterX, connectorEndY)
+              .stroke({
+                color: linkedGpuIds.has(gpu.id) ? 0xffd28a : 0x88efe0,
+                alpha:
+                  (linkedGpuIds.has(gpu.id)
+                    ? 0.72
+                    : connectorAlpha * (gpu.active ? 0.38 + gpu.linkLoad * 0.34 : 0.12)) * gpuEmphasis,
+                width: screenStroke(
+                  viewport.scale,
+                  linkedGpuIds.has(gpu.id) ? 1 : gpu.active ? 0.55 + gpu.linkLoad * 0.4 : 0.28,
+                  0.03,
+                  0.8,
+                ),
+              })
+          }
+          drawModule(graphics, gpu, viewport.scale, linkedGpuIds.has(gpu.id), lodState, gpuEmphasis)
+        }
+      }
+      if (debugToggles.bounds) {
+        for (const pod of visiblePods) {
+          graphics.roundRect(
+            pod.hitBounds.x,
+            pod.hitBounds.y,
+            pod.hitBounds.width,
+            pod.hitBounds.height,
+            screenRadius(viewport.scale, 24, 1.2, 16),
+          ).stroke({
+            color: 0xfde6ab,
+            alpha: 0.18,
+            width: screenStroke(viewport.scale, 1, 0.06, 1),
+          })
+        }
+        for (const node of visibleNodes) {
+          graphics.roundRect(
+            node.hitBounds.x,
+            node.hitBounds.y,
+            node.hitBounds.width,
+            node.hitBounds.height,
+            screenRadius(viewport.scale, 12, 0.6, 8),
+          ).stroke({
+            color: 0xfdf4cc,
+            alpha: 0.34,
+            width: screenStroke(viewport.scale, 1, 0.06, 1),
+          })
+          for (const gpu of node.gpus) {
+            graphics.roundRect(
+              gpu.hitBounds.x,
+              gpu.hitBounds.y,
+              gpu.hitBounds.width,
+              gpu.hitBounds.height,
+              screenRadius(viewport.scale, 6, 0.4, 4),
+            ).stroke({
+              color: 0x7adfff,
+              alpha: 0.24,
+              width: screenStroke(viewport.scale, 1, 0.06, 1),
+            })
+          }
+        }
+      }
+      if (debugToggles.hitAreas) {
+        for (const link of [...visibleLinks.row, ...visibleLinks.column, ...visibleLinks.bus]) {
+          graphics
+            .moveTo(link.x1, link.y1)
+            .lineTo(link.x2, link.y2)
+            .stroke({
+              color: link.kind === 'column' ? 0x60aaf7 : 0xffd08a,
+              alpha: 0.15,
+              width: screenStroke(viewport.scale, link.hitWidth, 0.5, 16),
+            })
+        }
+      }
+    },
+    [
+      debugToggles.bounds,
+      debugToggles.heat,
+      debugToggles.hitAreas,
+      getEmphasis,
+      linkedGpuIds,
+      linkedNodeIds,
+      lodState,
+      model,
+      viewport.scale,
+      visibleLinks.bus,
+      visibleLinks.column,
+      visibleLinks.row,
+      visibleNodes,
+      visiblePods,
+    ],
+  )
+  const redrawDynamic = useCallback(
+    (timeMs: number) => {
+      const graphics = dynamicRef.current
+      if (!graphics) {
+        return
+      }
+      graphics.clear()
+      const pulseTime = snapshotMode ? 0.42 : timeMs / 1000
+      const visibleTarget = pinnedRef.current ?? hoverRef.current
+      const linkGlowAlpha = mix(0.08, 0.18, lodState.weights.board)
+      const animateLinkGlow =
+        lodState.weights.board > 0.14 &&
+        visibleLinkCount < 900 &&
+        viewport.scale >= 0.28
+      const drawGlowLink = (link: (typeof model.rowLinks)[number], index: number, color: number) => {
+        const glow = pulse(timeMs, index * 0.19, snapshotMode ? 0 : 0.12)
+        graphics
+          .moveTo(link.x1, link.y1)
+          .lineTo(link.x2, link.y2)
+          .stroke({
+            color,
+            alpha: linkGlowAlpha * (0.12 + link.load * 0.5) * glow,
+            width: screenStroke(viewport.scale, 1.2 + link.load * 2.2, 0.08, 3.2),
+          })
+      }
+      if (animateLinkGlow) {
+        visibleLinks.row.forEach((link, index) => {
+          drawGlowLink(link, index, link.color)
+        })
+        visibleLinks.column.forEach((link, index) => {
+          drawGlowLink(link, index + visibleLinks.row.length, link.color)
+        })
+        visibleLinks.bus.forEach((link, index) => {
+          drawGlowLink(
+            link,
+            index + visibleLinks.row.length + visibleLinks.column.length,
+            0x9efef2,
+          )
+        })
+      }
+      if (linkedFocus) {
+        const wave = 0.58 + Math.sin(pulseTime * 2.4) * 0.18
+        for (const pod of visiblePods) {
+          if (!linkedPodIds.has(pod.id)) {
+            continue
+          }
+          drawCornerFocus(graphics, pod.focusFrame, viewport.scale, 0xffd78e, wave, 18, 4, 2)
+        }
+        for (const node of visibleNodes) {
+          if (!linkedNodeIds.has(node.id)) {
+            continue
+          }
+          drawCornerFocus(graphics, node.focusFrame, viewport.scale, 0xffd78e, wave, 9, 2, 1.1)
+        }
+        for (const gpu of visibleGpus) {
+          if (!linkedGpuIds.has(gpu.id)) {
+            continue
+          }
+          drawCornerFocus(graphics, gpu.focusFrame, viewport.scale, 0xffefc3, wave + 0.12, 6, 1, 1)
+        }
+      }
+      if (!visibleTarget) {
+        return
+      }
+      if (visibleTarget.kind === 'pod') {
+        const pod = podById.get(visibleTarget.id)
+        if (!pod) {
+          return
+        }
+        drawCornerFocus(graphics, pod.focusFrame, viewport.scale, 0xf9f5bc, 0.86, 22, 6, 2.3)
+        return
+      }
+      if (visibleTarget.kind === 'node') {
+        const node = nodeById.get(visibleTarget.id)
+        if (!node) {
+          return
+        }
+        drawCornerFocus(graphics, node.focusFrame, viewport.scale, 0xf9f5bc, 0.9, 10, 2, 1.7)
+        return
+      }
+      if (visibleTarget.kind === 'gpu') {
+        const gpu = gpuById.get(visibleTarget.id)
+        if (!gpu) {
+          return
+        }
+        drawCornerFocus(graphics, gpu.focusFrame, viewport.scale, 0xffffff, 0.96, 7, 1.5, 1.3)
+        return
+      }
+      const link = [...model.rowLinks, ...model.columnLinks, ...model.busLinks].find(
+        (item) => item.id === visibleTarget.id,
+      )
+      if (!link) {
+        return
+      }
+      graphics
+        .moveTo(link.x1, link.y1)
+        .lineTo(link.x2, link.y2)
+        .stroke({
+          color: 0xfef4c8,
+          alpha: 0.92,
+          width: screenStroke(viewport.scale, 2.6 + link.load * 2.8, 0.14, 4.2),
+        })
+    },
+    [
+      gpuById,
+      linkedFocus,
+      linkedGpuIds,
+      linkedNodeIds,
+      linkedPodIds,
+      lodState.weights.board,
+      model,
+      nodeById,
+      podById,
+      snapshotMode,
+      visibleGpus,
+      visibleLinkCount,
+      viewport.scale,
+      visibleLinks.bus,
+      visibleLinks.column,
+      visibleLinks.row,
+      visibleNodes,
+      visiblePods,
+    ],
+  )
+  useEffect(() => {
+    redrawDynamic(0)
+  }, [redrawDynamic, hoveredTarget, pinnedTarget, linkedFocus])
+  useTick(
+    useCallback(
+      (ticker: Ticker) => {
+        if (snapshotMode) {
+          return
+        }
+        const shouldAnimate =
+          linkedFocus != null ||
+          (lodState.weights.board > 0.14 &&
+            visibleLinkCount < 900 &&
+            viewport.scale >= 0.28)
+        if (!shouldAnimate) {
+          return
+        }
+        redrawDynamic(performance.now())
+        statsRef.current.elapsed += ticker.deltaMS
+        statsRef.current.frames += 1
+        if (statsRef.current.elapsed >= 500) {
+          const fps = (statsRef.current.frames * 1000) / statsRef.current.elapsed
+          onFpsChange(fps)
+          statsRef.current.elapsed = 0
+          statsRef.current.frames = 0
+        }
+      },
+      [
+        linkedFocus,
+        lodState.weights.board,
+        onFpsChange,
+        redrawDynamic,
+        snapshotMode,
+        viewport.scale,
+        visibleLinkCount,
+      ],
+    ),
+  )
+  const debugLabels = debugEnabled && debugToggles.ids
+  return (
+    <pixiContainer x={viewport.x} y={viewport.y} scale={viewport.scale}>
+      <pixiGraphics draw={drawStatic} />
+      <pixiGraphics ref={dynamicRef} draw={noopDraw} />
+      {debugLabels
+        ? visiblePods.map((pod) => (
+            <pixiText
+              key={`pod-label-${pod.id}`}
+              x={pod.x + 30}
+              y={pod.y + 24}
+              text={pod.active ? 'ACTIVE RACK' : `R${pod.index + 1}`}
+              style={{
+                fill: 0xdff7f0,
+                fontSize: screenWorld(viewport.scale, 18, 3.5, 24) * lodState.textScale,
+                fontFamily: 'IBM Plex Mono',
+                letterSpacing: screenWorld(viewport.scale, 2, 0.2, 2),
+              }}
+            />
+          ))
+        : null}
+      {debugLabels
+        ? visibleNodes.map((node) => (
+            <pixiText
+              key={`node-label-${node.id}`}
+              x={node.x + 10}
+              y={node.y + 8}
+              text={`N${node.index + 1}`}
+              style={{
+                fill: 0xdff7f0,
+                fontSize: screenWorld(viewport.scale, 8, 2, 10) * lodState.textScale,
+                fontFamily: 'IBM Plex Mono',
+              }}
+            />
+          ))
+        : null}
+    </pixiContainer>
+  )
+}
+export function ClusterMap({
+  viewModel,
+  debugEnabled,
+  snapshotMode,
+  linkedFocus,
+}: ClusterMapProps) {
+  const model = useMemo(() => buildTopologySceneModel(viewModel), [viewModel])
+  const [viewport, setViewport] = useState<ViewportState>({ x: 0, y: 0, scale: 1 })
+  const [surfaceSize, setSurfaceSize] = useState({ width: 0, height: 0 })
+  const [sceneReady, setSceneReady] = useState(false)
+  const [hoveredTarget, setHoveredTarget] = useState<HoverTarget | null>(null)
+  const [pinnedTarget, setPinnedTarget] = useState<HoverTarget | null>(null)
+  const [isDragging, setIsDragging] = useState(false)
+  const [fps, setFps] = useState(0)
+  const [debugToggles, setDebugToggles] = useState<DebugToggles>({
+    bounds: false,
+    ids: false,
+    heat: false,
+    hitAreas: false,
+    stats: true,
+  })
+  const surfaceRef = useRef<HTMLDivElement | null>(null)
+  const interactionLayerRef = useRef<HTMLDivElement | null>(null)
+  const interactionRef = useRef({
+    dragging: false,
+    moved: false,
+    distance: 0,
+    lastPointer: null as ScenePointer | null,
+    pointers: new Map<number, ScenePointer>(),
+    pinchDistance: 0,
+    pinchMidpoint: null as ScenePointer | null,
+  })
+  const linkedGpuIds = useMemo(() => {
+    return new Set(
+      model.nodes
+        .flatMap((node) => node.gpus)
+        .filter((gpu) => matchesLinkedFocus(gpu, linkedFocus))
+        .map((gpu) => gpu.id),
+    )
+  }, [linkedFocus, model.nodes])
+  const linkedNodeIds = useMemo(() => {
+    return new Set(
+      model.nodes
+        .filter((node) => node.gpus.some((gpu) => matchesLinkedFocus(gpu, linkedFocus)))
+        .map((node) => node.id),
+    )
+  }, [linkedFocus, model.nodes])
+  const linkedPodIds = useMemo(() => {
+    if (!linkedFocus) {
+      return new Set<string>()
+    }
+    return new Set(
+      model.nodes
+        .filter((node) => node.gpus.some((gpu) => matchesLinkedFocus(gpu, linkedFocus)))
+        .map((node) => `pod-${node.domainIndex}`),
+    )
+  }, [linkedFocus, model.nodes])
+  useEffect(() => {
+    if (surfaceSize.width === 0 || surfaceSize.height === 0) {
+      return
+    }
+    let settleFrame = 0
+    const frame = requestAnimationFrame(() => {
+      setViewport(getFitViewport(model, surfaceSize.width, surfaceSize.height))
+      settleFrame = requestAnimationFrame(() => {
+        setSceneReady(true)
+      })
+    })
+    return () => {
+      cancelAnimationFrame(frame)
+      cancelAnimationFrame(settleFrame)
+    }
+  }, [model, surfaceSize.height, surfaceSize.width])
+  const focusedDetails = useMemo<TargetDetails | null>(() => {
+    return describeTarget(model, viewModel, pinnedTarget ?? hoveredTarget)
+  }, [hoveredTarget, model, pinnedTarget, viewModel])
+  const debugObjects = useMemo(
+    () => createDebugObjectMap(model, viewport),
+    [model, viewport],
+  )
+  const detailLevel = useMemo(() => getTopologyLodState(viewport.scale).primaryBand, [viewport.scale])
+  const viewportConstraints = useMemo(() => {
+    if (surfaceSize.width === 0 || surfaceSize.height === 0) {
+      return null
+    }
+    return getViewportConstraints(model, surfaceSize.width, surfaceSize.height, viewport.scale)
+  }, [model, surfaceSize.height, surfaceSize.width, viewport.scale])
+  useEffect(() => {
+    if (!(debugEnabled || snapshotMode)) {
+      delete window.__TOPOLOGY_DEBUG__
+      return
+    }
+    window.__TOPOLOGY_DEBUG__ = {
+      ready: sceneReady,
+      viewport,
+      surfaceSize,
+      objectCounts: model.objectCounts,
+      objects: debugObjects,
+      hoveredTarget,
+      pinnedTarget,
+      detailLevel,
+      setViewport: (nextViewport: ViewportState) => {
+        setViewport(clampViewportToScene(nextViewport, model, surfaceSize.width, surfaceSize.height))
+      },
+    }
+    return () => {
+      delete window.__TOPOLOGY_DEBUG__
+    }
+  }, [
+    debugEnabled,
+    debugObjects,
+    hoveredTarget,
+    model.objectCounts,
+    pinnedTarget,
+    sceneReady,
+    snapshotMode,
+    surfaceSize,
+    detailLevel,
+    model,
+    viewport,
+  ])
+  const scenePointerFromClient = useCallback((clientX: number, clientY: number) => {
+    const bounds = interactionLayerRef.current?.getBoundingClientRect()
+    if (!bounds) {
+      return null
+    }
+    return {
+      x: clientX - bounds.left,
+      y: clientY - bounds.top,
+    }
+  }, [])
+  const scenePointerFromEvent = useCallback(
+    (event: Pick<ReactPointerEvent<HTMLDivElement>, 'clientX' | 'clientY'>) =>
+      scenePointerFromClient(event.clientX, event.clientY),
+    [scenePointerFromClient],
+  )
+  const toWorldPoint = useCallback(
+    (pointer: ScenePointer) => ({
+      x: (pointer.x - viewport.x) / viewport.scale,
+      y: (pointer.y - viewport.y) / viewport.scale,
+    }),
+    [viewport],
+  )
+  const setViewportClamped = useCallback(
+    (updater: ViewportState | ((current: ViewportState) => ViewportState)) => {
+      setViewport((current) => {
+        const nextViewport =
+          typeof updater === 'function'
+            ? (updater as (current: ViewportState) => ViewportState)(current)
+            : updater
+        return clampViewportToScene(nextViewport, model, surfaceSize.width, surfaceSize.height)
+      })
+    },
+    [model, surfaceSize.height, surfaceSize.width],
+  )
+  const applyZoomAtPointer = useCallback((screenPoint: ScenePointer, zoomFactor: number) => {
+    setViewportClamped((current) => {
+      const nextScale = clamp(
+        current.scale * zoomFactor,
+        viewportConstraints?.minScale ?? MIN_SCALE,
+        viewportConstraints?.maxScale ?? MAX_SCALE,
+      )
+      const worldX = (screenPoint.x - current.x) / current.scale
+      const worldY = (screenPoint.y - current.y) / current.scale
+      return {
+        scale: nextScale,
+        x: screenPoint.x - worldX * nextScale,
+        y: screenPoint.y - worldY * nextScale,
+      }
+    })
+  }, [setViewportClamped, viewportConstraints?.maxScale, viewportConstraints?.minScale])
+  const updateHoverFromPointer = useCallback(
+    (pointer: ScenePointer | null) => {
+      if (!pointer) {
+        setHoveredTarget((current) => (current === null ? current : null))
+        return
+      }
+      const worldPoint = toWorldPoint(pointer)
+      const next = findHoverTarget(model, worldPoint.x, worldPoint.y)
+      setHoveredTarget((current) => {
+        if (current?.kind === next?.kind && current?.id === next?.id) {
+          return current
+        }
+        return next
+      })
+    },
+    [model, toWorldPoint],
+  )
+  const resetViewport = useCallback(() => {
+    if (surfaceSize.width === 0 || surfaceSize.height === 0) {
+      return
+    }
+    setViewport(getFitViewport(model, surfaceSize.width, surfaceSize.height))
+  }, [model, surfaceSize.height, surfaceSize.width])
+  const handleSurfaceSizeChange = useCallback((width: number, height: number) => {
+    setSurfaceSize((current) => {
+      if (current.width === width && current.height === height) {
+        return current
+      }
+      return { width, height }
+    })
+    setSceneReady(false)
+  }, [])
+  useEffect(() => {
+    const element = interactionLayerRef.current
+    if (!element) {
+      return
+    }
+    const handleWheel = (event: WheelEvent) => {
+      if (event.target instanceof Element && event.target.closest('.scene-inspector, .scene-debug-panel')) {
+        return
+      }
+      const pointer = scenePointerFromClient(event.clientX, event.clientY)
+      if (!pointer) {
+        return
+      }
+      event.preventDefault()
+      event.stopPropagation()
+      const delta = event.ctrlKey ? event.deltaY * 1.8 : event.deltaY
+      const zoomFactor = Math.exp(-delta * 0.0015)
+      applyZoomAtPointer(pointer, zoomFactor)
+    }
+    element.addEventListener('wheel', handleWheel, { passive: false })
+    return () => {
+      element.removeEventListener('wheel', handleWheel)
+    }
+  }, [applyZoomAtPointer, scenePointerFromClient])
+  const togglePinnedTarget = useCallback(
+    (pointer: ScenePointer) => {
+      const worldPoint = toWorldPoint(pointer)
+      const target = findHoverTarget(model, worldPoint.x, worldPoint.y)
+      if (!target || target.kind === 'link') {
+        setPinnedTarget(null)
+        return
+      }
+      setPinnedTarget((current) => {
+        if (current?.kind === target.kind && current.id === target.id) {
+          return null
+        }
+        return target
+      })
+    },
+    [model, toWorldPoint],
+  )
+  const handlePointerDown = useCallback(
+    (event: ReactPointerEvent<HTMLDivElement>) => {
+      if (event.target !== event.currentTarget) {
+        return
+      }
+      const pointer = scenePointerFromEvent(event)
+      if (!pointer) {
+        return
+      }
+      const interaction = interactionRef.current
+      interaction.pointers.set(event.pointerId, pointer)
+      interaction.lastPointer = pointer
+      interaction.moved = false
+      interaction.distance = 0
+      if (interaction.pointers.size === 1) {
+        interaction.dragging = true
+        setIsDragging(true)
+      } else if (interaction.pointers.size === 2) {
+        const [first, second] = Array.from(interaction.pointers.values())
+        const deltaX = second.x - first.x
+        const deltaY = second.y - first.y
+        interaction.dragging = false
+        interaction.pinchDistance = Math.hypot(deltaX, deltaY)
+        interaction.pinchMidpoint = {
+          x: (first.x + second.x) / 2,
+          y: (first.y + second.y) / 2,
+        }
+        setIsDragging(false)
+      }
+      event.currentTarget.setPointerCapture(event.pointerId)
+    },
+    [scenePointerFromEvent],
+  )
+  const handlePointerMove = useCallback(
+    (event: ReactPointerEvent<HTMLDivElement>) => {
+      const pointer = scenePointerFromEvent(event)
+      if (!pointer) {
+        return
+      }
+      const interaction = interactionRef.current
+      if (interaction.pointers.has(event.pointerId)) {
+        interaction.pointers.set(event.pointerId, pointer)
+      }
+      if (interaction.pointers.size === 2) {
+        const [first, second] = Array.from(interaction.pointers.values())
+        const deltaX = second.x - first.x
+        const deltaY = second.y - first.y
+        const distance = Math.max(Math.hypot(deltaX, deltaY), 1)
+        const midpoint = {
+          x: (first.x + second.x) / 2,
+          y: (first.y + second.y) / 2,
+        }
+        if (interaction.pinchDistance > 0 && interaction.pinchMidpoint) {
+          const zoomFactor = distance / interaction.pinchDistance
+          setViewportClamped((current) => {
+            const nextScale = clamp(
+              current.scale * zoomFactor,
+              viewportConstraints?.minScale ?? MIN_SCALE,
+              viewportConstraints?.maxScale ?? MAX_SCALE,
+            )
+            const worldX = (midpoint.x - current.x) / current.scale
+            const worldY = (midpoint.y - current.y) / current.scale
+            return {
+              scale: nextScale,
+              x:
+                midpoint.x -
+                worldX * nextScale +
+                (midpoint.x - interaction.pinchMidpoint!.x),
+              y:
+                midpoint.y -
+                worldY * nextScale +
+                (midpoint.y - interaction.pinchMidpoint!.y),
+            }
+          })
+        }
+        interaction.pinchDistance = distance
+        interaction.pinchMidpoint = midpoint
+        interaction.moved = true
+        return
+      }
+      if (interaction.dragging && interaction.lastPointer) {
+        const deltaMoveX = pointer.x - interaction.lastPointer.x
+        const deltaMoveY = pointer.y - interaction.lastPointer.y
+        interaction.lastPointer = pointer
+        interaction.distance += Math.abs(deltaMoveX) + Math.abs(deltaMoveY)
+        if (interaction.distance > 2) {
+          interaction.moved = true
+        }
+        setViewportClamped((current) => ({
+          ...current,
+          x: current.x + deltaMoveX,
+          y: current.y + deltaMoveY,
+        }))
+        return
+      }
+      if (event.target !== event.currentTarget) {
+        return
+      }
+      updateHoverFromPointer(pointer)
+    },
+    [
+      scenePointerFromEvent,
+      setViewportClamped,
+      updateHoverFromPointer,
+      viewportConstraints?.maxScale,
+      viewportConstraints?.minScale,
+    ],
+  )
+  const releasePointer = useCallback((pointerId: number) => {
+    const interaction = interactionRef.current
+    interaction.pointers.delete(pointerId)
+    if (interaction.pointers.size < 2) {
+      interaction.pinchDistance = 0
+      interaction.pinchMidpoint = null
+    }
+    if (interaction.pointers.size === 0) {
+      interaction.dragging = false
+      interaction.lastPointer = null
+      setIsDragging(false)
+      return
+    }
+    const remainingPointer = Array.from(interaction.pointers.values())[0]
+    interaction.lastPointer = remainingPointer
+    interaction.dragging = true
+  }, [])
+  const handlePointerUp = useCallback(
+    (event: ReactPointerEvent<HTMLDivElement>) => {
+      const pointer = scenePointerFromEvent(event)
+      const interaction = interactionRef.current
+      const wasClick = !interaction.moved && interaction.distance < 8 && interaction.pointers.size <= 1
+      if (event.currentTarget.hasPointerCapture(event.pointerId)) {
+        event.currentTarget.releasePointerCapture(event.pointerId)
+      }
+      releasePointer(event.pointerId)
+      if (pointer) {
+        updateHoverFromPointer(pointer)
+      }
+      if (!pointer || !wasClick || event.target !== event.currentTarget) {
+        return
+      }
+      togglePinnedTarget(pointer)
+    },
+    [releasePointer, scenePointerFromEvent, togglePinnedTarget, updateHoverFromPointer],
+  )
+  const handlePointerLeave = useCallback(() => {
+    interactionRef.current.dragging = false
+    interactionRef.current.lastPointer = null
+    interactionRef.current.pointers.clear()
+    interactionRef.current.pinchDistance = 0
+    interactionRef.current.pinchMidpoint = null
+    setIsDragging(false)
+    setHoveredTarget(null)
+  }, [])
+  const toggleDebugFlag = (key: keyof DebugToggles) => {
+    setDebugToggles((current) => ({
+      ...current,
+      [key]: !current[key],
+    }))
+  }
+  const linkedSummary = linkedFocus ? linkedFocus.label : null
+  return (
+    <div className="topology-scene-shell">
+      <div className="scene-toolbar">
+        <div className="scene-toolbar-actions">
+          <button
+            type="button"
+            className="scene-button"
+            onClick={resetViewport}
+            data-testid="camera-reset"
+          >
+            reset camera
+          </button>
+        </div>
+      </div>
+      <div
+        ref={surfaceRef}
+        className="pixi-surface-wrap topology-surface-wrap"
+      >
+        <PixiSurface
+          className="pixi-surface"
+          canvasClassName="pixi-canvas"
+          testId="topology-scene"
+          onSizeChange={handleSurfaceSizeChange}
+        >
+          {() => (
+            <TopologyScene
+              model={model}
+              viewport={viewport}
+              surfaceSize={surfaceSize}
+              hoveredTarget={hoveredTarget}
+              pinnedTarget={pinnedTarget}
+              linkedFocus={linkedFocus}
+              linkedGpuIds={linkedGpuIds}
+              linkedNodeIds={linkedNodeIds}
+              linkedPodIds={linkedPodIds}
+              debugEnabled={debugEnabled}
+              snapshotMode={snapshotMode}
+              debugToggles={debugToggles}
+              onFpsChange={setFps}
+            />
+          )}
+        </PixiSurface>
+        <div
+          ref={interactionLayerRef}
+          className={`topology-interaction-layer${isDragging ? ' is-dragging' : ''}`}
+          data-testid="topology-interaction-layer"
+          onPointerDown={handlePointerDown}
+          onPointerMove={handlePointerMove}
+          onPointerUp={handlePointerUp}
+          onPointerCancel={handlePointerLeave}
+          onPointerLeave={handlePointerLeave}
+          onDoubleClick={(event) => {
+            if (event.target !== event.currentTarget) {
+              return
+            }
+            resetViewport()
+          }}
+        >
+          <div className="scene-inspector" data-testid="topology-inspector">
+            <p className="mini-label">
+              {pinnedTarget ? 'Pinned target' : hoveredTarget ? 'Hover target' : 'Topology inspector'}
+            </p>
+            {focusedDetails ? (
+              <>
+                <h3>{focusedDetails.heading}</h3>
+                <p className="inspector-subheading">{focusedDetails.subheading}</p>
+                {linkedSummary ? (
+                  <p className="inspector-link-note">Transformer highlight: {linkedSummary}</p>
+                ) : null}
+                <dl className="inspector-grid">
+                  {focusedDetails.metrics.map((metric) => (
+                    <div key={`${focusedDetails.id}-${metric.label}`}>
+                      <dt>{metric.label}</dt>
+                      <dd>{metric.value}</dd>
+                    </div>
+                  ))}
+                </dl>
+              </>
+            ) : (
+              <>
+                <h3>Inspect the cluster</h3>
+                <p className="inspector-subheading">
+                  Hover a rack or GPU to inspect placement, memory headroom, and link load.
+                  Pan and zoom to move between fabric and package detail.
+                </p>
+                {linkedSummary ? (
+                  <p className="inspector-link-note">Transformer highlight: {linkedSummary}</p>
+                ) : null}
+              </>
+            )}
+          </div>
+          {(debugEnabled || snapshotMode) && (
+            <div className="scene-debug-panel" data-testid="topology-debug">
+              <p className="mini-label">Debug overlay</p>
+              <div className="debug-toggle-grid">
+                <label>
+                  <input
+                    type="checkbox"
+                    checked={debugToggles.bounds}
+                    onChange={() => toggleDebugFlag('bounds')}
+                  />
+                  Bounds
+                </label>
+                <label>
+                  <input
+                    type="checkbox"
+                    checked={debugToggles.ids}
+                    onChange={() => toggleDebugFlag('ids')}
+                  />
+                  Node / GPU ids
+                </label>
+                <label>
+                  <input
+                    type="checkbox"
+                    checked={debugToggles.heat}
+                    onChange={() => toggleDebugFlag('heat')}
+                  />
+                  Load heat
+                </label>
+                <label>
+                  <input
+                    type="checkbox"
+                    checked={debugToggles.hitAreas}
+                    onChange={() => toggleDebugFlag('hitAreas')}
+                  />
+                  Link hit areas
+                </label>
+                <label>
+                  <input
+                    type="checkbox"
+                    checked={debugToggles.stats}
+                    onChange={() => toggleDebugFlag('stats')}
+                  />
+                  FPS / counts
+                </label>
+              </div>
+              {debugToggles.stats ? (
+                <div className="debug-stats">
+                  <span>FPS {snapshotMode ? 'snapshot' : fps.toFixed(0)}</span>
+                  <span>Racks {model.objectCounts.pods}</span>
+                  <span>Nodes {model.objectCounts.nodes}</span>
+                  <span>GPUs {model.objectCounts.gpus}</span>
+                  <span>Detail {detailLevel}</span>
+                  <span>Zoom {viewport.scale.toFixed(2)}x</span>
+                </div>
+              ) : null}
+            </div>
+          )}
+        </div>
+      </div>
+    </div>
+  )
+}

src/components/ControlsPanel.tsx ADDED Viewed

	@@ -0,0 +1,688 @@

+import {
+  applyGpuPreset,
+  applyExamplePhase,
+  applyExamplePreset,
+  getExamplePresetOptions,
+  getFactorOptions,
+  getGpuPresetId,
+  getGpuPresetOptions,
+  getPhaseOptions,
+  type WorkbenchConfig,
+} from '../lib/workbench'
+import { type WorkbenchViewModel } from '../lib/workbenchPresenter'
+type ControlsPanelProps = {
+  config: WorkbenchConfig
+  onChange: (next: WorkbenchConfig) => void
+  onReset: () => void
+  viewModel: WorkbenchViewModel
+}
+type OptionStripProps = {
+  label: string
+  caption: string
+  value: number
+  options: number[]
+  onSelect: (value: number) => void
+}
+type SelectFieldProps = {
+  label: string
+  value: string
+  options: Array<{ value: string; label: string }>
+  onChange: (value: string) => void
+}
+type NumberFieldProps = {
+  label: string
+  value: number
+  min: number
+  step?: number
+  onChange: (value: number) => void
+}
+type ToggleFieldProps = {
+  label: string
+  checked: boolean
+  onChange: (checked: boolean) => void
+}
+function OptionStrip({ label, caption, value, options, onSelect }: OptionStripProps) {
+  return (
+    <div className="control-card">
+      <div className="control-card-header">
+        <div>
+          <h3>{label}</h3>
+          <p>{caption}</p>
+        </div>
+        <span className="control-badge">{value}x</span>
+      </div>
+      <div className="option-strip" role="group" aria-label={label}>
+        {options.map((option) => (
+          <button
+            key={option}
+            type="button"
+            className={option === value ? 'option-chip active' : 'option-chip'}
+            onClick={() => onSelect(option)}
+          >
+            {option}
+          </button>
+        ))}
+      </div>
+    </div>
+  )
+}
+function SelectField({ label, value, options, onChange }: SelectFieldProps) {
+  return (
+    <label className="control-field">
+      <span>{label}</span>
+      <select value={value} onChange={(event) => onChange(event.target.value)}>
+        {options.map((option) => (
+          <option key={option.value} value={option.value}>
+            {option.label}
+          </option>
+        ))}
+      </select>
+    </label>
+  )
+}
+function NumberField({ label, value, min, step = 1, onChange }: NumberFieldProps) {
+  return (
+    <label className="control-field">
+      <span>{label}</span>
+      <input
+        type="number"
+        min={min}
+        step={step}
+        value={value}
+        onChange={(event) => onChange(Math.max(min, Number(event.target.value) || min))}
+      />
+    </label>
+  )
+}
+function ToggleField({ label, checked, onChange }: ToggleFieldProps) {
+  return (
+    <label className="control-toggle">
+      <input
+        type="checkbox"
+        checked={checked}
+        onChange={(event) => onChange(event.target.checked)}
+      />
+      <span>{label}</span>
+    </label>
+  )
+}
+export function ControlsPanel({
+  config,
+  onChange,
+  onReset,
+  viewModel,
+}: ControlsPanelProps) {
+  const totalGPUs = config.cluster.gpusPerNode * config.cluster.numNodes
+  const factorOptions = {
+    tp: getFactorOptions(config.cluster.gpusPerNode, config.parallelism.tp),
+    pp: getFactorOptions(totalGPUs, config.parallelism.pp),
+    cp: getFactorOptions(totalGPUs, config.parallelism.cp),
+    ep: getFactorOptions(config.cluster.gpusPerNode, config.parallelism.ep),
+  }
+  const gpuPresetId = getGpuPresetId(config.cluster.gpuType)
+  const updateTraining = <K extends keyof WorkbenchConfig['training']>(
+    key: K,
+    value: WorkbenchConfig['training'][K],
+  ) => {
+    onChange({
+      ...config,
+      customized: true,
+      training: {
+        ...config.training,
+        [key]: value,
+      },
+    })
+  }
+  const updateModel = <K extends keyof WorkbenchConfig['model']>(
+    key: K,
+    value: WorkbenchConfig['model'][K],
+  ) => {
+    onChange({
+      ...config,
+      customized: true,
+      model: {
+        ...config.model,
+        [key]: value,
+      },
+    })
+  }
+  const updateCluster = <K extends keyof WorkbenchConfig['cluster']>(
+    key: K,
+    value: WorkbenchConfig['cluster'][K],
+  ) => {
+    onChange({
+      ...config,
+      customized: true,
+      cluster: {
+        ...config.cluster,
+        [key]: value,
+      },
+    })
+  }
+  const updateAttentionProfile = (
+    nextAttentionProfile: WorkbenchConfig['model']['attentionProfile'],
+  ) => {
+    updateModel('attentionProfile', nextAttentionProfile)
+  }
+  const updateMoe = (nextMoe: NonNullable<WorkbenchConfig['model']['moe']> | undefined) => {
+    updateModel('moe', nextMoe)
+  }
+  const updateParallelism = <K extends keyof WorkbenchConfig['parallelism']>(
+    key: K,
+    value: WorkbenchConfig['parallelism'][K],
+  ) => {
+    onChange({
+      ...config,
+      customized: true,
+      parallelism: {
+        ...config.parallelism,
+        [key]: value,
+      },
+    })
+  }
+  return (
+    <section className="controls-band">
+      <div className="controls-head">
+        <div>
+          <p className="mini-label">Controls</p>
+          <h2>Example / model / cluster / training / parallelism</h2>
+        </div>
+        <div className="controls-meta">
+          <span>{viewModel.analysis.totalGPUs.toLocaleString()} GPUs in cluster</span>
+          <span>
+            {config.parallelism.tp}×{config.parallelism.pp}×{config.parallelism.cp}×
+            {config.parallelism.ep} · DP {viewModel.analysis.derivedParallelism.dp}
+          </span>
+          <button type="button" className="reset-chip" onClick={onReset}>
+            reset
+          </button>
+        </div>
+      </div>
+      <div className="controls-stack">
+        <div className="control-card">
+          <div className="control-card-header">
+            <div>
+              <h3>Examples</h3>
+              <p>Built-in frontier runs as starting points for custom model and cluster edits</p>
+            </div>
+            {config.customized ? <span className="control-badge">customized</span> : null}
+          </div>
+          <div className="field-grid">
+            <SelectField
+              label="Example preset"
+              value={config.examplePresetId}
+              options={getExamplePresetOptions().map((option) => ({
+                value: option.id,
+                label: option.label,
+              }))}
+              onChange={(value) =>
+                onChange(applyExamplePreset(config, value as typeof config.examplePresetId))
+              }
+            />
+            <SelectField
+              label="Phase"
+              value={config.phaseId}
+              options={getPhaseOptions(config.examplePresetId).map((option) => ({
+                value: option.id,
+                label: option.label,
+              }))}
+              onChange={(value) => onChange(applyExamplePhase(config, value as typeof config.phaseId))}
+            />
+          </div>
+        </div>
+        <div className="control-card">
+          <div className="control-card-header">
+            <div>
+              <h3>Model</h3>
+              <p>Core architecture dimensions, attention layout, and optional MoE settings</p>
+            </div>
+          </div>
+          <div className="field-grid field-grid-wide">
+            <SelectField
+              label="Architecture"
+              value={config.model.architecture}
+              options={[
+                { value: 'dense', label: 'dense' },
+                { value: 'moe', label: 'moe' },
+              ]}
+              onChange={(value) => {
+                const architecture = value as WorkbenchConfig['model']['architecture']
+                onChange({
+                  ...config,
+                  customized: true,
+                  model: {
+                    ...config.model,
+                    architecture,
+                    moe:
+                      architecture === 'moe'
+                        ? (config.model.moe ?? {
+                            numExperts: 64,
+                            expertsPerToken: 2,
+                            numDenseLayers: Math.min(config.model.numLayers, 4),
+                            expertIntermediateSize: Math.max(1024, config.model.hiddenDim),
+                            activeParamsPerToken: undefined,
+                          })
+                        : undefined,
+                  },
+                })
+              }}
+            />
+            <NumberField
+              label="Hidden dim"
+              value={config.model.hiddenDim}
+              min={128}
+              step={128}
+              onChange={(value) => updateModel('hiddenDim', value)}
+            />
+            <NumberField
+              label="Layers"
+              value={config.model.numLayers}
+              min={1}
+              onChange={(value) => updateModel('numLayers', value)}
+            />
+            <NumberField
+              label="Attention heads"
+              value={config.model.numHeads}
+              min={1}
+              onChange={(value) => updateModel('numHeads', value)}
+            />
+            <NumberField
+              label="KV heads"
+              value={config.model.numKVHeads}
+              min={1}
+              onChange={(value) => updateModel('numKVHeads', value)}
+            />
+            <NumberField
+              label="Vocab size"
+              value={config.model.vocabSize}
+              min={256}
+              onChange={(value) => updateModel('vocabSize', value)}
+            />
+            <NumberField
+              label="Intermediate size"
+              value={config.model.intermediateSize}
+              min={256}
+              onChange={(value) => updateModel('intermediateSize', value)}
+            />
+            <SelectField
+              label="Attention profile"
+              value={config.model.attentionProfile?.type ?? 'full'}
+              options={[
+                { value: 'full', label: 'full attention' },
+                { value: 'hybrid', label: 'hybrid attention' },
+              ]}
+              onChange={(value) => {
+                if (value === 'full') {
+                  updateAttentionProfile({ type: 'full' })
+                  return
+                }
+                updateAttentionProfile({
+                  type: 'hybrid',
+                  slidingWindowSize: config.model.attentionProfile?.slidingWindowSize ?? 4096,
+                  globalAttentionFraction: config.model.attentionProfile?.globalAttentionFraction,
+                  globalAttentionEveryN: config.model.attentionProfile?.globalAttentionEveryN,
+                })
+              }}
+            />
+            <div className="control-field control-field-toggle">
+              <span>Tied embeddings</span>
+              <ToggleField
+                label="share output head"
+                checked={config.model.tiedEmbeddings}
+                onChange={(value) => updateModel('tiedEmbeddings', value)}
+              />
+            </div>
+            {config.model.attentionProfile?.type === 'hybrid' ? (
+              <>
+                <NumberField
+                  label="Sliding window"
+                  value={config.model.attentionProfile.slidingWindowSize ?? 4096}
+                  min={0}
+                  step={256}
+                  onChange={(value) =>
+                    updateAttentionProfile({
+                      ...config.model.attentionProfile,
+                      type: 'hybrid',
+                      slidingWindowSize: value > 0 ? value : undefined,
+                    })
+                  }
+                />
+                <NumberField
+                  label="Global attention fraction"
+                  value={config.model.attentionProfile.globalAttentionFraction ?? 0}
+                  min={0}
+                  step={0.05}
+                  onChange={(value) =>
+                    updateAttentionProfile({
+                      ...config.model.attentionProfile,
+                      type: 'hybrid',
+                      globalAttentionFraction: value > 0 ? value : undefined,
+                    })
+                  }
+                />
+                <NumberField
+                  label="Global every N"
+                  value={config.model.attentionProfile.globalAttentionEveryN ?? 0}
+                  min={0}
+                  onChange={(value) =>
+                    updateAttentionProfile({
+                      ...config.model.attentionProfile,
+                      type: 'hybrid',
+                      globalAttentionEveryN: value > 0 ? value : undefined,
+                    })
+                  }
+                />
+              </>
+            ) : null}
+            {config.model.architecture === 'moe' && config.model.moe ? (
+              <>
+                <NumberField
+                  label="Experts"
+                  value={config.model.moe.numExperts}
+                  min={1}
+                  onChange={(value) => updateMoe({ ...config.model.moe!, numExperts: value })}
+                />
+                <NumberField
+                  label="Experts per token"
+                  value={config.model.moe.expertsPerToken}
+                  min={1}
+                  onChange={(value) =>
+                    updateMoe({ ...config.model.moe!, expertsPerToken: value })
+                  }
+                />
+                <NumberField
+                  label="Dense layers"
+                  value={config.model.moe.numDenseLayers}
+                  min={0}
+                  onChange={(value) => updateMoe({ ...config.model.moe!, numDenseLayers: value })}
+                />
+                <NumberField
+                  label="Expert intermediate"
+                  value={config.model.moe.expertIntermediateSize}
+                  min={256}
+                  onChange={(value) =>
+                    updateMoe({ ...config.model.moe!, expertIntermediateSize: value })
+                  }
+                />
+                <NumberField
+                  label="Active params / token"
+                  value={config.model.moe.activeParamsPerToken ?? 0}
+                  min={0}
+                  step={1000000}
+                  onChange={(value) =>
+                    updateMoe({
+                      ...config.model.moe!,
+                      activeParamsPerToken: value > 0 ? value : undefined,
+                    })
+                  }
+                />
+              </>
+            ) : null}
+          </div>
+        </div>
+        <div className="control-card">
+          <div className="control-card-header">
+            <div>
+              <h3>Cluster</h3>
+              <p>GPU spec, node count, and rack-local topology for the physical fabric</p>
+            </div>
+          </div>
+          <div className="field-grid field-grid-wide">
+            <SelectField
+              label="GPU preset"
+              value={gpuPresetId}
+              options={[
+                ...getGpuPresetOptions().map((option) => ({
+                  value: option.id,
+                  label: option.label,
+                })),
+                { value: 'custom', label: 'custom GPU' },
+              ]}
+              onChange={(value) => {
+                if (value === 'custom') {
+                  return
+                }
+                onChange(applyGpuPreset(config, value as Parameters<typeof applyGpuPreset>[1]))
+              }}
+            />
+            <label className="control-field">
+              <span>GPU name</span>
+              <input
+                type="text"
+                value={config.cluster.gpuType.name}
+                onChange={(event) =>
+                  updateCluster('gpuType', {
+                    ...config.cluster.gpuType,
+                    name: event.target.value,
+                  })
+                }
+              />
+            </label>
+            <NumberField
+              label="HBM capacity (GB)"
+              value={config.cluster.gpuType.hbmCapacityGB}
+              min={1}
+              onChange={(value) =>
+                updateCluster('gpuType', {
+                  ...config.cluster.gpuType,
+                  hbmCapacityGB: value,
+                })
+              }
+            />
+            <NumberField
+              label="Peak BF16 TFLOPs"
+              value={config.cluster.gpuType.peakTFLOPsBF16}
+              min={1}
+              onChange={(value) =>
+                updateCluster('gpuType', {
+                  ...config.cluster.gpuType,
+                  peakTFLOPsBF16: value,
+                })
+              }
+            />
+            <NumberField
+              label="HBM bandwidth (TB/s)"
+              value={config.cluster.gpuType.memBandwidthTBs}
+              min={0.1}
+              step={0.05}
+              onChange={(value) =>
+                updateCluster('gpuType', {
+                  ...config.cluster.gpuType,
+                  memBandwidthTBs: value,
+                })
+              }
+            />
+            <NumberField
+              label="GPUs per node"
+              value={config.cluster.gpusPerNode}
+              min={1}
+              onChange={(value) => updateCluster('gpusPerNode', value)}
+            />
+            <NumberField
+              label="Nodes"
+              value={config.cluster.numNodes}
+              min={1}
+              onChange={(value) => updateCluster('numNodes', value)}
+            />
+            <NumberField
+              label="Nodes per rack"
+              value={config.cluster.nodesPerRack ?? config.cluster.numNodes}
+              min={1}
+              onChange={(value) => updateCluster('nodesPerRack', value)}
+            />
+            <NumberField
+              label="Intra-node bandwidth (GB/s)"
+              value={config.cluster.intraNodeBandwidthGBs}
+              min={1}
+              onChange={(value) => updateCluster('intraNodeBandwidthGBs', value)}
+            />
+            <NumberField
+              label="Inter-node bandwidth (GB/s)"
+              value={config.cluster.interNodeBandwidthGBs}
+              min={1}
+              onChange={(value) => updateCluster('interNodeBandwidthGBs', value)}
+            />
+          </div>
+        </div>
+        <div className="control-card">
+          <div className="control-card-header">
+            <div>
+              <h3>Training</h3>
+              <p>Batching, precision, optimizer state, and recompute</p>
+            </div>
+          </div>
+          <div className="field-grid field-grid-wide">
+            <NumberField
+              label="Micro-batch"
+              value={config.training.microBatchSize}
+              min={1}
+              onChange={(value) => updateTraining('microBatchSize', value)}
+            />
+            <NumberField
+              label="Seq length"
+              value={config.training.seqLength}
+              min={256}
+              step={256}
+              onChange={(value) => updateTraining('seqLength', value)}
+            />
+            <NumberField
+              label="Grad accum"
+              value={config.training.gradAccumSteps}
+              min={1}
+              onChange={(value) => updateTraining('gradAccumSteps', value)}
+            />
+            <SelectField
+              label="Precision"
+              value={config.training.precision}
+              options={[
+                { value: 'bf16', label: 'bf16' },
+                { value: 'fp16', label: 'fp16' },
+                { value: 'fp8', label: 'fp8' },
+                { value: 'fp32', label: 'fp32' },
+              ]}
+              onChange={(value) =>
+                updateTraining('precision', value as WorkbenchConfig['training']['precision'])
+              }
+            />
+            <SelectField
+              label="Optimizer"
+              value={config.training.optimizer}
+              options={[
+                { value: 'adamw', label: 'adamw' },
+                { value: 'adam', label: 'adam' },
+                { value: 'muon', label: 'muon' },
+                { value: 'sgd', label: 'sgd' },
+              ]}
+              onChange={(value) =>
+                updateTraining('optimizer', value as WorkbenchConfig['training']['optimizer'])
+              }
+            />
+            <div className="control-field control-field-toggle">
+              <span>Activation recompute</span>
+              <ToggleField
+                label="checkpointing enabled"
+                checked={config.training.activationCheckpointing}
+                onChange={(value) => updateTraining('activationCheckpointing', value)}
+              />
+            </div>
+          </div>
+        </div>
+        <div className="controls-grid controls-grid-parallelism">
+          <OptionStrip
+            label="TP"
+            caption="tensor shards"
+            value={config.parallelism.tp}
+            options={factorOptions.tp}
+            onSelect={(value) => updateParallelism('tp', value)}
+          />
+          <OptionStrip
+            label="PP"
+            caption="pipeline stages"
+            value={config.parallelism.pp}
+            options={factorOptions.pp}
+            onSelect={(value) => updateParallelism('pp', value)}
+          />
+          <OptionStrip
+            label="CP"
+            caption="context shards"
+            value={config.parallelism.cp}
+            options={factorOptions.cp}
+            onSelect={(value) => updateParallelism('cp', value)}
+          />
+          <OptionStrip
+            label="EP"
+            caption="expert lanes"
+            value={config.parallelism.ep}
+            options={factorOptions.ep}
+            onSelect={(value) => updateParallelism('ep', value)}
+          />
+          <OptionStrip
+            label="ZeRO"
+            caption="state sharding"
+            value={config.parallelism.zeroStage}
+            options={[0, 1, 2, 3]}
+            onSelect={(value) => updateParallelism('zeroStage', value as 0 | 1 | 2 | 3)}
+          />
+          <div className="control-card">
+            <div className="control-card-header">
+              <div>
+                <h3>Megatron-style derived DP</h3>
+                <p>DP is derived from cluster size and the other parallel axes</p>
+              </div>
+              <span className="control-badge">{viewModel.analysis.derivedParallelism.dp}x</span>
+            </div>
+            <div className="field-grid">
+              <NumberField
+                label="FSDP shard group"
+                value={config.parallelism.fsdpShardGroupSize}
+                min={0}
+                step={8}
+                onChange={(value) => updateParallelism('fsdpShardGroupSize', value)}
+              />
+              <div className="control-field control-field-toggle">
+                <span>Distributed optimizer</span>
+                <ToggleField
+                  label="optimizer state sharding"
+                  checked={config.parallelism.distributedOptimizer}
+                  onChange={(value) => updateParallelism('distributedOptimizer', value)}
+                />
+              </div>
+            </div>
+          </div>
+        </div>
+      </div>
+    </section>
+  )
+}

src/components/pixi/PixiSurface.tsx ADDED Viewed

	@@ -0,0 +1,50 @@

+import { Application } from '@pixi/react'
+import { useEffect, useRef, type ReactNode } from 'react'
+import { useElementSize } from '../../hooks/useElementSize'
+type PixiSurfaceProps = {
+  className?: string
+  canvasClassName?: string
+  testId?: string
+  onSizeChange?: (width: number, height: number) => void
+  children: (size: { width: number; height: number }) => ReactNode
+}
+export function PixiSurface({
+  className,
+  canvasClassName,
+  testId,
+  onSizeChange,
+  children,
+}: PixiSurfaceProps) {
+  const hostRef = useRef<HTMLDivElement>(null)
+  const size = useElementSize(hostRef)
+  useEffect(() => {
+    if (!onSizeChange || size.width === 0 || size.height === 0) {
+      return
+    }
+    onSizeChange(size.width, size.height)
+  }, [onSizeChange, size.height, size.width])
+  return (
+    <div ref={hostRef} className={className} data-testid={testId}>
+      {size.width > 0 && size.height > 0 ? (
+        <Application
+          className={canvasClassName}
+          resizeTo={hostRef}
+          preference="webgl"
+          antialias
+          autoDensity
+          backgroundAlpha={0}
+          clearBeforeRender
+          sharedTicker={false}
+          resolution={1}
+        >
+          {children(size)}
+        </Application>
+      ) : null}
+    </div>
+  )
+}

src/hooks/useElementSize.ts ADDED Viewed

	@@ -0,0 +1,48 @@

+import { useEffect, useState, type RefObject } from 'react'
+type ElementSize = {
+  width: number
+  height: number
+}
+export function useElementSize<T extends HTMLElement>(
+  ref: RefObject<T | null>,
+): ElementSize {
+  const [size, setSize] = useState<ElementSize>({ width: 0, height: 0 })
+  useEffect(() => {
+    const element = ref.current
+    if (!element) {
+      return
+    }
+    const observer = new ResizeObserver((entries) => {
+      const entry = entries[0]
+      if (!entry) {
+        return
+      }
+      const nextWidth = Math.round(entry.contentRect.width)
+      const nextHeight = Math.round(entry.contentRect.height)
+      setSize((current) => {
+        if (current.width === nextWidth && current.height === nextHeight) {
+          return current
+        }
+        return {
+          width: nextWidth,
+          height: nextHeight,
+        }
+      })
+    })
+    observer.observe(element)
+    return () => {
+      observer.disconnect()
+    }
+  }, [ref])
+  return size
+}

src/index.css ADDED Viewed

	@@ -0,0 +1,56 @@

+:root {
+  --font-sans: 'Space Grotesk', sans-serif;
+  --font-mono: 'IBM Plex Mono', monospace;
+  --ink-strong: #16262d;
+  --ink-soft: #526771;
+  --ink-muted: #7d8f98;
+  --accent-cool: #117a70;
+  --accent-warm: #d66225;
+  --panel-stroke: rgba(24, 64, 86, 0.08);
+  color: var(--ink-strong);
+  font-family: var(--font-sans);
+  font-synthesis: none;
+  font-weight: 400;
+  line-height: 1.5;
+  text-rendering: optimizeLegibility;
+  -moz-osx-font-smoothing: grayscale;
+  -webkit-font-smoothing: antialiased;
+}
+* {
+  box-sizing: border-box;
+}
+html {
+  min-width: 320px;
+  min-height: 100%;
+}
+body {
+  min-width: 320px;
+  min-height: 100vh;
+  margin: 0;
+  background:
+    radial-gradient(circle at 0% 0%, rgba(17, 122, 112, 0.08), transparent 24%),
+    radial-gradient(circle at 100% 0%, rgba(214, 98, 37, 0.1), transparent 20%),
+    linear-gradient(180deg, #f3f0e8 0%, #efebe0 48%, #ebe6d9 100%);
+}
+button,
+input,
+select,
+textarea {
+  font: inherit;
+}
+button {
+  cursor: pointer;
+}
+a {
+  color: inherit;
+}
+#root {
+  min-height: 100vh;
+}

src/lib/linkedFocus.ts ADDED Viewed

	@@ -0,0 +1,35 @@

+export type LinkedFocus = {
+  source: 'transformer'
+  label: string
+  stage: number | null
+  tpLane: number | null
+  cpShard: number | null
+  sequenceBand: number | null
+}
+type FocusableGpu = {
+  active: boolean
+  stage: number
+  tpLane: number
+  cpShard: number
+}
+export function matchesLinkedFocus(gpu: FocusableGpu, focus: LinkedFocus | null) {
+  if (!focus || !gpu.active) {
+    return false
+  }
+  if (focus.stage !== null && gpu.stage !== focus.stage) {
+    return false
+  }
+  if (focus.tpLane !== null && gpu.tpLane !== focus.tpLane) {
+    return false
+  }
+  if (focus.cpShard !== null && gpu.cpShard !== focus.cpShard) {
+    return false
+  }
+  return true
+}

src/lib/topologyLod.ts ADDED Viewed

	@@ -0,0 +1,216 @@

+export type TopologyLodBand =
+  | 'overview'
+  | 'board'
+  | 'package'
+  | 'silicon'
+  | 'micro'
+export type TopologyLodWeights = Record<TopologyLodBand, number>
+export type TopologyLodPolicy = {
+  minScale: number
+  maxScale: number
+  bands: Record<
+    TopologyLodBand,
+    {
+      fadeInStart: number
+      fadeInEnd: number
+      fadeOutStart: number
+      fadeOutEnd: number
+    }
+  >
+}
+export type TopologyLodState = {
+  scale: number
+  primaryBand: TopologyLodBand
+  weights: TopologyLodWeights
+  deepIsolation: number
+  textScale: number
+}
+const clamp = (value: number, min: number, max: number) =>
+  Math.min(Math.max(value, min), max)
+export const TOPOLOGY_LOD_POLICY: TopologyLodPolicy = {
+  minScale: 0.035,
+  maxScale: 420,
+  bands: {
+    overview: {
+      fadeInStart: 0.035,
+      fadeInEnd: 0.06,
+      fadeOutStart: 1.4,
+      fadeOutEnd: 3.8,
+    },
+    board: {
+      fadeInStart: 0.12,
+      fadeInEnd: 0.55,
+      fadeOutStart: 7,
+      fadeOutEnd: 18,
+    },
+    package: {
+      fadeInStart: 1.8,
+      fadeInEnd: 4.8,
+      fadeOutStart: 28,
+      fadeOutEnd: 70,
+    },
+    silicon: {
+      fadeInStart: 10,
+      fadeInEnd: 24,
+      fadeOutStart: 95,
+      fadeOutEnd: 220,
+    },
+    micro: {
+      fadeInStart: 36,
+      fadeInEnd: 95,
+      fadeOutStart: 420,
+      fadeOutEnd: 420,
+    },
+  },
+}
+export const saturate = (value: number) => clamp(value, 0, 1)
+export const smoothstep = (value: number) => {
+  const safe = saturate(value)
+  return safe * safe * (3 - 2 * safe)
+}
+export const fadeBetween = (scale: number, start: number, end: number) => {
+  if (start === end) {
+    return scale >= end ? 1 : 0
+  }
+  return smoothstep((scale - start) / (end - start))
+}
+export const bandWeight = (
+  scale: number,
+  {
+    fadeInStart,
+    fadeInEnd,
+    fadeOutStart,
+    fadeOutEnd,
+  }: TopologyLodPolicy['bands'][TopologyLodBand],
+) => {
+  const fadeIn = fadeBetween(scale, fadeInStart, fadeInEnd)
+  const fadeOut =
+    fadeOutStart === fadeOutEnd ? 1 : 1 - fadeBetween(scale, fadeOutStart, fadeOutEnd)
+  return saturate(fadeIn * fadeOut)
+}
+const BAND_ORDER: TopologyLodBand[] = [
+  'overview',
+  'board',
+  'package',
+  'silicon',
+  'micro',
+]
+const createExclusiveWeights = (scale: number): TopologyLodWeights => {
+  const weights: TopologyLodWeights = {
+    overview: 0,
+    board: 0,
+    package: 0,
+    silicon: 0,
+    micro: 0,
+  }
+  const transitions = [
+    {
+      from: 'overview' as const,
+      to: 'board' as const,
+      start: TOPOLOGY_LOD_POLICY.bands.board.fadeInStart,
+      end: TOPOLOGY_LOD_POLICY.bands.board.fadeInEnd,
+    },
+    {
+      from: 'board' as const,
+      to: 'package' as const,
+      start: TOPOLOGY_LOD_POLICY.bands.package.fadeInStart,
+      end: TOPOLOGY_LOD_POLICY.bands.package.fadeInEnd,
+    },
+    {
+      from: 'package' as const,
+      to: 'silicon' as const,
+      start: TOPOLOGY_LOD_POLICY.bands.silicon.fadeInStart,
+      end: TOPOLOGY_LOD_POLICY.bands.silicon.fadeInEnd,
+    },
+    {
+      from: 'silicon' as const,
+      to: 'micro' as const,
+      start: TOPOLOGY_LOD_POLICY.bands.micro.fadeInStart,
+      end: TOPOLOGY_LOD_POLICY.bands.micro.fadeInEnd,
+    },
+  ]
+  for (let index = 0; index < transitions.length; index += 1) {
+    const transition = transitions[index]
+    const previous = transitions[index - 1]
+    const next = transitions[index + 1]
+    if (scale < transition.start) {
+      weights[transition.from] = 1
+      return weights
+    }
+    if (scale <= transition.end) {
+      const alpha = fadeBetween(scale, transition.start, transition.end)
+      weights[transition.from] = 1 - alpha
+      weights[transition.to] = alpha
+      return weights
+    }
+    if (next && scale > transition.end && scale < next.start) {
+      weights[transition.to] = 1
+      return weights
+    }
+    if (!next && scale > transition.end) {
+      weights.micro = 1
+      return weights
+    }
+    if (!previous && scale <= transition.start) {
+      weights.overview = 1
+      return weights
+    }
+  }
+  weights.micro = 1
+  return weights
+}
+export const getTopologyLodState = (scale: number): TopologyLodState => {
+  const weights = createExclusiveWeights(scale)
+  const primaryBand =
+    BAND_ORDER.find((band) => weights[band] >= 0.5) ??
+    (Object.entries(weights).sort((left, right) => right[1] - left[1])[0]?.[0] as
+      | TopologyLodBand
+      | undefined) ??
+    'overview'
+  return {
+    scale,
+    primaryBand,
+    weights,
+    deepIsolation: fadeBetween(scale, 22, 80),
+    textScale: 1 - fadeBetween(scale, 14, 36) * 0.4,
+  }
+}
+export const screenWorld = (
+  scale: number,
+  pixels: number,
+  minWorld = 0.04,
+  maxWorld = 3.5,
+) => clamp(pixels / Math.max(scale, 0.001), minWorld, maxWorld)
+export const screenStroke = (
+  scale: number,
+  pixels: number,
+  minWorld = 0.04,
+  maxWorld = 2.8,
+) => screenWorld(scale, pixels, minWorld, maxWorld)
+export const mix = (from: number, to: number, alpha: number) => from + (to - from) * alpha

src/lib/topologyScene.ts ADDED Viewed

	@@ -0,0 +1,980 @@

+import { type WorkbenchViewModel } from './workbenchPresenter'
+import { TOPOLOGY_LOD_POLICY, type TopologyLodPolicy } from './topologyLod'
+export type ViewportState = {
+  x: number
+  y: number
+  scale: number
+}
+export type HitTargetKind = 'pod' | 'node' | 'gpu' | 'link'
+export type LinkKind = 'row' | 'column' | 'bus'
+export type SceneHitBounds = {
+  x: number
+  y: number
+  width: number
+  height: number
+}
+export type SceneAnchorFrame = {
+  x: number
+  y: number
+  width: number
+  height: number
+}
+export type ScenePod = {
+  id: string
+  kind: 'pod'
+  index: number
+  x: number
+  y: number
+  width: number
+  height: number
+  centerX: number
+  centerY: number
+  active: boolean
+  load: number
+  thermal: number
+  activeGpus: number
+  totalGpus: number
+  title: string
+  hitBounds: SceneHitBounds
+  focusFrame: SceneAnchorFrame
+}
+export type SceneRack = {
+  id: string
+  index: number
+  x: number
+  y: number
+  width: number
+  height: number
+  load: number
+  nodeIds: string[]
+  hitBounds: SceneHitBounds
+  focusFrame: SceneAnchorFrame
+}
+export type SceneGpu = {
+  id: string
+  kind: 'gpu'
+  nodeId: string
+  nodeIndex: number
+  domainIndex: number
+  domainLocalIndex: number
+  columnIndex: number
+  rowIndex: number
+  globalIndex: number
+  localIndex: number
+  x: number
+  y: number
+  width: number
+  height: number
+  active: boolean
+  stage: number
+  tpLane: number
+  cpShard: number
+  epLane: number
+  dpReplica: number
+  replicaGroup: number
+  fsdpRank: number
+  utilization: number
+  linkLoad: number
+  memoryUsedGB: number
+  memoryCapacityGB: number
+  fillColor: number
+  outlineAlpha: number
+  title: string
+  hitBounds: SceneHitBounds
+  focusFrame: SceneAnchorFrame
+  lodFrame: SceneAnchorFrame
+}
+export type SceneNode = {
+  id: string
+  kind: 'node'
+  index: number
+  domainIndex: number
+  domainLocalIndex: number
+  columnIndex: number
+  rowIndex: number
+  x: number
+  y: number
+  width: number
+  height: number
+  hubX: number
+  hubY: number
+  busX1: number
+  busX2: number
+  activeCount: number
+  localFabricLoad: number
+  interNodeLoad: number
+  gpus: SceneGpu[]
+  hitBounds: SceneHitBounds
+  focusFrame: SceneAnchorFrame
+}
+export type SceneLink = {
+  id: string
+  kind: LinkKind
+  scope: 'node' | 'rack'
+  x1: number
+  y1: number
+  x2: number
+  y2: number
+  load: number
+  color: number
+  width: number
+  hitWidth: number
+  title: string
+  trafficType: 'tp' | 'pp' | 'cp' | 'fsdp' | 'ep' | 'dp'
+  transport: 'nvlink' | 'infiniband'
+  volumeGB: number
+}
+export type HoverTarget = {
+  kind: HitTargetKind
+  id: string
+}
+export type TargetDetails = {
+  kind: HitTargetKind
+  id: string
+  heading: string
+  subheading: string
+  metrics: Array<{ label: string; value: string }>
+}
+export type TopologySceneModel = {
+  width: number
+  height: number
+  podColumns: number
+  podRows: number
+  podWidth: number
+  podHeight: number
+  nodeWidth: number
+  nodeHeight: number
+  activePodId: string
+  activePodBounds: SceneHitBounds
+  contextualNodeCount: number
+  lodPolicy: TopologyLodPolicy
+  pods: ScenePod[]
+  racks: SceneRack[]
+  nodes: SceneNode[]
+  rowLinks: SceneLink[]
+  columnLinks: SceneLink[]
+  busLinks: SceneLink[]
+  objectCounts: {
+    pods: number
+    nodes: number
+    gpus: number
+    links: number
+    activeGpus: number
+    contextualNodes: number
+  }
+}
+const clamp = (value: number, min: number, max: number) =>
+  Math.min(Math.max(value, min), max)
+const pointInBounds = (bounds: SceneHitBounds, x: number, y: number) =>
+  x >= bounds.x &&
+  x <= bounds.x + bounds.width &&
+  y >= bounds.y &&
+  y <= bounds.y + bounds.height
+const distanceToSegment = (
+  x: number,
+  y: number,
+  x1: number,
+  y1: number,
+  x2: number,
+  y2: number,
+) => {
+  const a = x - x1
+  const b = y - y1
+  const c = x2 - x1
+  const d = y2 - y1
+  const dot = a * c + b * d
+  const length = c * c + d * d
+  const t = length === 0 ? 0 : clamp(dot / length, 0, 1)
+  const projectionX = x1 + t * c
+  const projectionY = y1 + t * d
+  const deltaX = x - projectionX
+  const deltaY = y - projectionY
+  return Math.sqrt(deltaX * deltaX + deltaY * deltaY)
+}
+const percent = (value: number) => `${Math.round(value * 100)}%`
+const stagePalette = [0x6be5d2, 0xf2b36a, 0x8fbcff, 0xf28ac6, 0xb9e769, 0xc19cff]
+const trafficColorMap: Record<SceneLink['trafficType'], number> = {
+  tp: 0x7ce9da,
+  pp: 0xf1b067,
+  cp: 0x77a8f1,
+  fsdp: 0xb0a0ff,
+  ep: 0xb9e769,
+  dp: 0xf18888,
+}
+function getStageColor(stageIndex: number) {
+  return stagePalette[stageIndex % stagePalette.length]
+}
+function createNodeLayout(cluster: WorkbenchViewModel['config']['cluster']) {
+  const nodeCount = cluster.numNodes
+  const nodesPerRack = cluster.nodesPerRack ?? nodeCount
+  const rackCount = Math.ceil(nodeCount / nodesPerRack)
+  const rackColumns = Math.max(1, Math.ceil(Math.sqrt(rackCount)))
+  const rackRows = Math.max(1, Math.ceil(rackCount / rackColumns))
+  const nodeColumns = Math.max(1, Math.ceil(Math.sqrt(nodesPerRack * 2)))
+  const nodeRows = Math.max(1, Math.ceil(nodesPerRack / nodeColumns))
+  const gpuColumns = cluster.gpusPerNode <= 4 ? 2 : 4
+  const gpuRows = Math.ceil(cluster.gpusPerNode / gpuColumns)
+  return {
+    nodesPerRack,
+    rackCount,
+    rackColumns,
+    rackRows,
+    nodeColumns,
+    nodeRows,
+    gpuColumns,
+    gpuRows,
+  }
+}
+export function buildTopologySceneModel(viewModel: WorkbenchViewModel): TopologySceneModel {
+  const { config, analysis } = viewModel
+  const cluster = config.cluster
+  const clusterPaddingX = 72
+  const clusterPaddingY = 72
+  const rackGapX = 56
+  const rackGapY = 58
+  const rackPaddingX = 20
+  const rackPaddingY = 24
+  const nodeGapX = 12
+  const nodeGapY = 14
+  const layout = createNodeLayout(cluster)
+  const gpuWidth = 14
+  const gpuHeight = 10
+  const gpuGapX = 6
+  const gpuGapY = 6
+  const nodeWidth =
+    28 + layout.gpuColumns * gpuWidth + Math.max(layout.gpuColumns - 1, 0) * gpuGapX
+  const nodeHeight =
+    26 + layout.gpuRows * gpuHeight + Math.max(layout.gpuRows - 1, 0) * gpuGapY
+  const rackWidth =
+    rackPaddingX * 2 +
+    layout.nodeColumns * nodeWidth +
+    Math.max(layout.nodeColumns - 1, 0) * nodeGapX
+  const rackHeight =
+    rackPaddingY * 2 +
+    layout.nodeRows * nodeHeight +
+    Math.max(layout.nodeRows - 1, 0) * nodeGapY
+  const width =
+    clusterPaddingX * 2 +
+    layout.rackColumns * rackWidth +
+    Math.max(layout.rackColumns - 1, 0) * rackGapX
+  const height =
+    clusterPaddingY * 2 +
+    layout.rackRows * rackHeight +
+    Math.max(layout.rackRows - 1, 0) * rackGapY
+  const pods: ScenePod[] = Array.from({ length: layout.rackCount }, (_, rackIndex) => {
+    const column = rackIndex % layout.rackColumns
+    const row = Math.floor(rackIndex / layout.rackColumns)
+    const x = clusterPaddingX + column * (rackWidth + rackGapX)
+    const y = clusterPaddingY + row * (rackHeight + rackGapY)
+    return {
+      id: `pod-${rackIndex}`,
+      kind: 'pod',
+      index: rackIndex,
+      x,
+      y,
+      width: rackWidth,
+      height: rackHeight,
+      centerX: x + rackWidth / 2,
+      centerY: y + rackHeight / 2,
+      active: false,
+      load: 0,
+      thermal: 0,
+      activeGpus: 0,
+      totalGpus: 0,
+      title: `${cluster.rackLabel ?? 'rack'} ${rackIndex + 1}`,
+      hitBounds: {
+        x,
+        y,
+        width: rackWidth,
+        height: rackHeight,
+      },
+      focusFrame: {
+        x: x + 5,
+        y: y + 5,
+        width: rackWidth - 10,
+        height: rackHeight - 10,
+      },
+    }
+  })
+  const nodeByIndex = new Map<number, SceneNode>()
+  const gpuByGlobalIndex = new Map<number, SceneGpu>()
+  const rawGpuMap = [...analysis.gpuMap].sort((left, right) => left.globalGPUIndex - right.globalGPUIndex)
+  const rawGpuByGlobalIndex = new Map(
+    rawGpuMap.map((gpu) => [gpu.globalGPUIndex, gpu] as const),
+  )
+  for (let nodeIndex = 0; nodeIndex < cluster.numNodes; nodeIndex += 1) {
+    const domainIndex = Math.floor(nodeIndex / layout.nodesPerRack)
+    const domainLocalIndex = nodeIndex % layout.nodesPerRack
+    const columnIndex = domainLocalIndex % layout.nodeColumns
+    const rowIndex = Math.floor(domainLocalIndex / layout.nodeColumns)
+    const rack = pods[domainIndex]
+    const x = rack.x + rackPaddingX + columnIndex * (nodeWidth + nodeGapX)
+    const y = rack.y + rackPaddingY + rowIndex * (nodeHeight + nodeGapY)
+    const hubX = x + nodeWidth / 2
+    const hubY = y + nodeHeight / 2
+    const gpuLeftInset =
+      (nodeWidth -
+        (layout.gpuColumns * gpuWidth + Math.max(layout.gpuColumns - 1, 0) * gpuGapX)) /
+      2
+    const gpuTopInset =
+      (nodeHeight -
+        (layout.gpuRows * gpuHeight + Math.max(layout.gpuRows - 1, 0) * gpuGapY)) /
+      2
+    const nodeGpuMap = rawGpuMap
+      .filter((gpu) => gpu.nodeIndex === nodeIndex)
+      .sort((left, right) => left.localGPUIndex - right.localGPUIndex)
+    const gpus: SceneGpu[] = nodeGpuMap.map((gpu) => {
+      const localColumn = gpu.localGPUIndex % layout.gpuColumns
+      const localRow = Math.floor(gpu.localGPUIndex / layout.gpuColumns)
+      const gpuX = x + gpuLeftInset + localColumn * (gpuWidth + gpuGapX)
+      const gpuY = y + gpuTopInset + localRow * (gpuHeight + gpuGapY)
+      const memoryUtilization = gpu.memoryCapacityGB > 0 ? gpu.memoryUsedGB / gpu.memoryCapacityGB : 0
+      const fillColor = gpu.memoryUsedGB > 0 ? getStageColor(Math.max(gpu.ppStage, 0)) : 0x26404d
+      const outlineAlpha = gpu.memoryUsedGB > 0 ? 0.3 + memoryUtilization * 0.4 : 0.14
+      const sceneGpu = {
+        id: `gpu-${gpu.globalGPUIndex}`,
+        kind: 'gpu' as const,
+        nodeId: `node-${nodeIndex}`,
+        nodeIndex,
+        domainIndex,
+        domainLocalIndex,
+        columnIndex,
+        rowIndex,
+        globalIndex: gpu.globalGPUIndex,
+        localIndex: gpu.localGPUIndex,
+        x: gpuX,
+        y: gpuY,
+        width: gpuWidth,
+        height: gpuHeight,
+        active: gpu.isActive,
+        stage: gpu.ppStage,
+        tpLane: gpu.tpLane,
+        cpShard: gpu.cpShard,
+        epLane: gpu.epLane,
+        dpReplica: gpu.dpReplica,
+        replicaGroup: gpu.replicaGroup,
+        fsdpRank: gpu.fsdpRank,
+        utilization: clamp(memoryUtilization, 0, 1),
+        linkLoad: 0,
+        memoryUsedGB: gpu.memoryUsedGB,
+        memoryCapacityGB: gpu.memoryCapacityGB,
+        fillColor,
+        outlineAlpha,
+        title: `GPU ${gpu.globalGPUIndex + 1}`,
+        hitBounds: {
+          x: gpuX - 3,
+          y: gpuY - 3,
+          width: gpuWidth + 6,
+          height: gpuHeight + 6,
+        },
+        focusFrame: {
+          x: gpuX - 0.8,
+          y: gpuY - 0.8,
+          width: gpuWidth + 1.6,
+          height: gpuHeight + 1.6,
+        },
+        lodFrame: {
+          x: gpuX,
+          y: gpuY,
+          width: gpuWidth,
+          height: gpuHeight,
+        },
+      }
+      gpuByGlobalIndex.set(gpu.globalGPUIndex, sceneGpu)
+      return sceneGpu
+    })
+    const node = {
+      id: `node-${nodeIndex}`,
+      kind: 'node' as const,
+      index: nodeIndex,
+      domainIndex,
+      domainLocalIndex,
+      columnIndex,
+      rowIndex,
+      x,
+      y,
+      width: nodeWidth,
+      height: nodeHeight,
+      hubX,
+      hubY,
+      busX1: x + 8,
+      busX2: x + nodeWidth - 8,
+      activeCount: gpus.filter((gpu) => gpu.memoryUsedGB > 0).length,
+      localFabricLoad: 0,
+      interNodeLoad: 0,
+      gpus,
+      hitBounds: {
+        x,
+        y,
+        width: nodeWidth,
+        height: nodeHeight,
+      },
+      focusFrame: {
+        x: x + 2,
+        y: y + 2,
+        width: nodeWidth - 4,
+        height: nodeHeight - 4,
+      },
+    }
+    nodeByIndex.set(nodeIndex, node)
+  }
+  const aggregateLinks = new Map<
+    string,
+    {
+      fromNode: number
+      toNode: number
+      trafficType: SceneLink['trafficType']
+      transport: SceneLink['transport']
+      loadSum: number
+      volumeSum: number
+      count: number
+    }
+  >()
+  const aggregateRackLinks = new Map<
+    string,
+    {
+      fromRack: number
+      toRack: number
+      trafficType: SceneLink['trafficType']
+      transport: SceneLink['transport']
+      loadSum: number
+      volumeSum: number
+      count: number
+    }
+  >()
+  const addAggregate = (
+    fromNode: number,
+    toNode: number,
+    trafficType: SceneLink['trafficType'],
+    transport: SceneLink['transport'],
+    utilizationPercent: number,
+    volumeGB: number,
+  ) => {
+    const ordered =
+      fromNode <= toNode ? [fromNode, toNode] as const : [toNode, fromNode] as const
+    const key = `${ordered[0]}:${ordered[1]}:${trafficType}:${transport}`
+    const current = aggregateLinks.get(key) ?? {
+      fromNode: ordered[0],
+      toNode: ordered[1],
+      trafficType,
+      transport,
+      loadSum: 0,
+      volumeSum: 0,
+      count: 0,
+    }
+    current.loadSum += utilizationPercent / 100
+    current.volumeSum += volumeGB
+    current.count += 1
+    aggregateLinks.set(key, current)
+  }
+  const addRackAggregate = (
+    fromRack: number,
+    toRack: number,
+    trafficType: SceneLink['trafficType'],
+    transport: SceneLink['transport'],
+    utilizationPercent: number,
+    volumeGB: number,
+  ) => {
+    const ordered =
+      fromRack <= toRack ? ([fromRack, toRack] as const) : ([toRack, fromRack] as const)
+    const key = `${ordered[0]}:${ordered[1]}:${trafficType}:${transport}`
+    const current = aggregateRackLinks.get(key) ?? {
+      fromRack: ordered[0],
+      toRack: ordered[1],
+      trafficType,
+      transport,
+      loadSum: 0,
+      volumeSum: 0,
+      count: 0,
+    }
+    current.loadSum += utilizationPercent / 100
+    current.volumeSum += volumeGB
+    current.count += 1
+    aggregateRackLinks.set(key, current)
+  }
+  const pushLoad = (target: Map<number, number[]>, key: number, value: number) => {
+    target.set(key, [...(target.get(key) ?? []), value])
+  }
+  for (const link of analysis.links) {
+    const fromGpu = rawGpuByGlobalIndex.get(link.fromGPU)
+    const toGpu = rawGpuByGlobalIndex.get(link.toGPU)
+    if (!fromGpu || !toGpu) {
+      continue
+    }
+    const fromRack = Math.floor(fromGpu.nodeIndex / layout.nodesPerRack)
+    const toRack = Math.floor(toGpu.nodeIndex / layout.nodesPerRack)
+    if (fromRack !== toRack) {
+      addRackAggregate(
+        fromRack,
+        toRack,
+        link.trafficType,
+        link.type,
+        link.utilizationPercent,
+        link.volumeGB,
+      )
+      continue
+    }
+    addAggregate(
+      fromGpu.nodeIndex,
+      toGpu.nodeIndex,
+      link.trafficType,
+      link.type,
+      link.utilizationPercent,
+      link.volumeGB,
+    )
+  }
+  const rowLinks: SceneLink[] = []
+  const columnLinks: SceneLink[] = []
+  const busLinks: SceneLink[] = []
+  const localLoads = new Map<number, number[]>()
+  const interLoads = new Map<number, number[]>()
+  for (const aggregate of aggregateLinks.values()) {
+    const averageLoad = aggregate.count > 0 ? aggregate.loadSum / aggregate.count : 0
+    const averageVolume = aggregate.count > 0 ? aggregate.volumeSum / aggregate.count : 0
+    const color = trafficColorMap[aggregate.trafficType]
+    if (aggregate.fromNode === aggregate.toNode) {
+      const node = nodeByIndex.get(aggregate.fromNode)
+      if (!node) {
+        continue
+      }
+      const offset = busLinks.filter((link) => link.id.startsWith(`bus-${node.index}`)).length * 3
+      busLinks.push({
+        id: `bus-${node.index}-${aggregate.trafficType}`,
+        kind: 'bus',
+        scope: 'node',
+        x1: node.busX1,
+        y1: node.hubY + offset,
+        x2: node.busX2,
+        y2: node.hubY + offset,
+        load: averageLoad,
+        color,
+        width: 0.9 + averageLoad * 2,
+        hitWidth: 10,
+        title: `${aggregate.trafficType.toUpperCase()} ${aggregate.transport} on ${cluster.nodeLabel ?? 'node'} ${node.index + 1}`,
+        trafficType: aggregate.trafficType,
+        transport: aggregate.transport,
+        volumeGB: roundVolume(averageVolume),
+      })
+      pushLoad(localLoads, node.index, averageLoad)
+      continue
+    }
+    const fromNode = nodeByIndex.get(aggregate.fromNode)
+    const toNode = nodeByIndex.get(aggregate.toNode)
+    if (!fromNode || !toNode) {
+      continue
+    }
+    const sceneLink = {
+      id: `link-${aggregate.fromNode}-${aggregate.toNode}-${aggregate.trafficType}`,
+      kind:
+        Math.abs(fromNode.hubX - toNode.hubX) >= Math.abs(fromNode.hubY - toNode.hubY)
+          ? ('row' as const)
+          : ('column' as const),
+      scope: 'node' as const,
+      x1: fromNode.hubX,
+      y1: fromNode.hubY,
+      x2: toNode.hubX,
+      y2: toNode.hubY,
+      load: averageLoad,
+      color,
+      width: 1 + averageLoad * 2.6,
+      hitWidth: aggregate.transport === 'infiniband' ? 18 : 12,
+      title:
+        `${aggregate.trafficType.toUpperCase()} ${aggregate.transport} between ` +
+        `${cluster.nodeLabel ?? 'node'} ${fromNode.index + 1} and ${cluster.nodeLabel ?? 'node'} ${toNode.index + 1}`,
+      trafficType: aggregate.trafficType,
+      transport: aggregate.transport,
+      volumeGB: roundVolume(averageVolume),
+    }
+    if (sceneLink.kind === 'row') {
+      rowLinks.push(sceneLink)
+    } else {
+      columnLinks.push(sceneLink)
+    }
+    const targetMap = aggregate.transport === 'nvlink' ? localLoads : interLoads
+    pushLoad(targetMap, fromNode.index, averageLoad)
+    pushLoad(targetMap, toNode.index, averageLoad)
+  }
+  for (const aggregate of aggregateRackLinks.values()) {
+    const averageLoad = aggregate.count > 0 ? aggregate.loadSum / aggregate.count : 0
+    const averageVolume = aggregate.count > 0 ? aggregate.volumeSum / aggregate.count : 0
+    const color = trafficColorMap[aggregate.trafficType]
+    const fromRack = pods[aggregate.fromRack]
+    const toRack = pods[aggregate.toRack]
+    if (!fromRack || !toRack) {
+      continue
+    }
+    const sceneLink = {
+      id: `rack-link-${aggregate.fromRack}-${aggregate.toRack}-${aggregate.trafficType}`,
+      kind:
+        Math.abs(fromRack.centerX - toRack.centerX) >= Math.abs(fromRack.centerY - toRack.centerY)
+          ? ('row' as const)
+          : ('column' as const),
+      scope: 'rack' as const,
+      x1: fromRack.centerX,
+      y1: fromRack.centerY,
+      x2: toRack.centerX,
+      y2: toRack.centerY,
+      load: averageLoad,
+      color,
+      width: 1.6 + averageLoad * 3.2,
+      hitWidth: 22,
+      title:
+        `${aggregate.trafficType.toUpperCase()} ${aggregate.transport} between ` +
+        `${cluster.rackLabel ?? 'rack'} ${aggregate.fromRack + 1} and ${cluster.rackLabel ?? 'rack'} ${aggregate.toRack + 1}`,
+      trafficType: aggregate.trafficType,
+      transport: aggregate.transport,
+      volumeGB: roundVolume(averageVolume),
+    }
+    if (sceneLink.kind === 'row') {
+      rowLinks.push(sceneLink)
+    } else {
+      columnLinks.push(sceneLink)
+    }
+    for (const node of nodeByIndex.values()) {
+      if (node.domainIndex === aggregate.fromRack || node.domainIndex === aggregate.toRack) {
+        pushLoad(interLoads, node.index, averageLoad)
+      }
+    }
+  }
+  for (const node of nodeByIndex.values()) {
+    node.localFabricLoad = average(localLoads.get(node.index) ?? [])
+    node.interNodeLoad = average(interLoads.get(node.index) ?? [])
+    for (const gpu of node.gpus) {
+      gpu.linkLoad = clamp(node.localFabricLoad * 0.7 + node.interNodeLoad * 0.6, 0, 1)
+    }
+  }
+  for (const pod of pods) {
+    const rackNodes = Array.from(nodeByIndex.values()).filter((node) => node.domainIndex === pod.index)
+    pod.load = average(rackNodes.map((node) => node.interNodeLoad))
+    pod.thermal = average(rackNodes.map((node) => node.localFabricLoad))
+    pod.activeGpus = rackNodes.reduce((sum, node) => sum + node.gpus.filter((gpu) => gpu.active).length, 0)
+    pod.totalGpus = rackNodes.reduce((sum, node) => sum + node.gpus.length, 0)
+  }
+  const activePod =
+    [...pods].sort((left, right) => right.activeGpus - left.activeGpus)[0] ?? pods[0]
+  for (const pod of pods) {
+    pod.active = pod.id === activePod?.id
+  }
+  const racks: SceneRack[] = pods.map((pod) => ({
+    id: `rack-${pod.index}`,
+    index: pod.index,
+    x: pod.x,
+    y: pod.y,
+    width: pod.width,
+    height: pod.height,
+    load: pod.load,
+    nodeIds: Array.from(nodeByIndex.values())
+      .filter((node) => node.domainIndex === pod.index)
+      .map((node) => node.id),
+    hitBounds: pod.hitBounds,
+    focusFrame: pod.focusFrame,
+  }))
+  const nodes = [...nodeByIndex.values()].sort((left, right) => left.index - right.index)
+  return {
+    width,
+    height,
+    podColumns: layout.rackColumns,
+    podRows: layout.rackRows,
+    podWidth: rackWidth,
+    podHeight: rackHeight,
+    nodeWidth,
+    nodeHeight,
+    activePodId: activePod?.id ?? 'pod-0',
+    activePodBounds: activePod?.hitBounds ?? {
+      x: 0,
+      y: 0,
+      width,
+      height,
+    },
+    contextualNodeCount: cluster.numNodes,
+    lodPolicy: TOPOLOGY_LOD_POLICY,
+    pods,
+    racks,
+    nodes,
+    rowLinks,
+    columnLinks,
+    busLinks,
+    objectCounts: {
+      pods: pods.length,
+      nodes: nodes.length,
+      gpus: nodes.reduce((sum, node) => sum + node.gpus.length, 0),
+      links: rowLinks.length + columnLinks.length + busLinks.length,
+      activeGpus: analysis.gpuMap.filter((gpu) => gpu.isActive).length,
+      contextualNodes: cluster.numNodes,
+    },
+  }
+}
+function average(values: number[]) {
+  if (values.length === 0) {
+    return 0
+  }
+  return values.reduce((sum, value) => sum + value, 0) / values.length
+}
+function roundVolume(value: number) {
+  return Math.round(value * 100) / 100
+}
+export function findHoverTarget(
+  model: TopologySceneModel,
+  x: number,
+  y: number,
+): HoverTarget | null {
+  for (const node of model.nodes) {
+    for (const gpu of node.gpus) {
+      if (pointInBounds(gpu.hitBounds, x, y)) {
+        return {
+          kind: 'gpu',
+          id: gpu.id,
+        }
+      }
+    }
+  }
+  for (const node of model.nodes) {
+    if (pointInBounds(node.hitBounds, x, y)) {
+      return {
+        kind: 'node',
+        id: node.id,
+      }
+    }
+  }
+  for (const pod of model.pods) {
+    if (pointInBounds(pod.hitBounds, x, y)) {
+      return {
+        kind: 'pod',
+        id: pod.id,
+      }
+    }
+  }
+  const links = [...model.rowLinks, ...model.columnLinks, ...model.busLinks]
+  for (const link of links) {
+    const distance = distanceToSegment(x, y, link.x1, link.y1, link.x2, link.y2)
+    if (distance <= link.hitWidth / 2) {
+      return {
+        kind: 'link',
+        id: link.id,
+      }
+    }
+  }
+  return null
+}
+export function describeTarget(
+  model: TopologySceneModel,
+  viewModel: WorkbenchViewModel,
+  target: HoverTarget | null,
+): TargetDetails | null {
+  if (!target) {
+    return null
+  }
+  const rackLabel = viewModel.config.cluster.rackLabel ?? 'rack'
+  const nodeLabel = viewModel.config.cluster.nodeLabel ?? 'node'
+  if (target.kind === 'pod') {
+    const pod = model.pods.find((item) => item.id === target.id)
+    if (!pod) {
+      return null
+    }
+    return {
+      kind: 'pod',
+      id: pod.id,
+      heading: `${rackLabel} ${pod.index + 1}`,
+      subheading: `${pod.totalGpus} GPUs laid out across ${Math.ceil(pod.totalGpus / viewModel.config.cluster.gpusPerNode)} ${nodeLabel}s.`,
+      metrics: [
+        { label: 'Active GPUs', value: String(pod.activeGpus) },
+        { label: 'Rack-local load', value: percent(pod.thermal) },
+        { label: 'Scale-out load', value: percent(pod.load) },
+        { label: 'Rack capacity', value: `${pod.totalGpus} GPUs` },
+      ],
+    }
+  }
+  if (target.kind === 'gpu') {
+    const gpu = model.nodes.flatMap((node) => node.gpus).find((item) => item.id === target.id)
+    if (!gpu) {
+      return null
+    }
+    return {
+      kind: 'gpu',
+      id: gpu.id,
+      heading: `GPU ${gpu.globalIndex + 1}`,
+      subheading:
+        `${rackLabel} ${gpu.domainIndex + 1} · ${nodeLabel} ${gpu.domainLocalIndex + 1} · ` +
+        `slot ${gpu.localIndex + 1}`,
+      metrics: [
+        { label: 'Stage', value: gpu.memoryUsedGB > 0 ? `P${gpu.stage + 1}` : 'idle' },
+        { label: 'Tensor lane', value: gpu.memoryUsedGB > 0 ? `T${gpu.tpLane + 1}` : 'idle' },
+        { label: 'Context shard', value: gpu.memoryUsedGB > 0 ? `C${gpu.cpShard + 1}` : 'idle' },
+        { label: 'Expert lane', value: gpu.memoryUsedGB > 0 ? `E${gpu.epLane + 1}` : 'idle' },
+        { label: 'Data replica', value: gpu.memoryUsedGB > 0 ? `D${gpu.dpReplica + 1}` : 'idle' },
+        { label: 'Replica group', value: gpu.memoryUsedGB > 0 ? `G${gpu.replicaGroup + 1}` : 'idle' },
+        { label: 'FSDP rank', value: gpu.memoryUsedGB > 0 ? `F${gpu.fsdpRank + 1}` : 'idle' },
+        { label: 'HBM', value: `${gpu.memoryUsedGB.toFixed(1)} / ${gpu.memoryCapacityGB.toFixed(0)} GB` },
+        { label: 'Link load', value: percent(gpu.linkLoad) },
+      ],
+    }
+  }
+  if (target.kind === 'node') {
+    const node = model.nodes.find((item) => item.id === target.id)
+    if (!node) {
+      return null
+    }
+    return {
+      kind: 'node',
+      id: node.id,
+      heading: `${nodeLabel} ${node.domainLocalIndex + 1}`,
+      subheading: `${rackLabel} ${node.domainIndex + 1} · ${node.activeCount}/${viewModel.config.cluster.gpusPerNode} GPUs allocated`,
+      metrics: [
+        { label: 'Active GPUs', value: String(node.activeCount) },
+        { label: 'Rack-local load', value: percent(node.localFabricLoad) },
+        { label: 'Scale-out load', value: percent(node.interNodeLoad) },
+        { label: 'Node size', value: `${viewModel.config.cluster.gpusPerNode} GPUs` },
+      ],
+    }
+  }
+  const link = [...model.rowLinks, ...model.columnLinks, ...model.busLinks].find(
+    (item) => item.id === target.id,
+  )
+  if (!link) {
+    return null
+  }
+  return {
+    kind: 'link',
+    id: link.id,
+    heading: link.title,
+    subheading:
+      link.scope === 'rack'
+        ? 'Inter-rack fabric segment'
+        : link.transport === 'nvlink'
+        ? 'Local high-bandwidth GPU fabric'
+        : 'Rack-local fabric segment',
+    metrics: [
+      { label: 'Traffic', value: link.trafficType.toUpperCase() },
+      { label: 'Transport', value: link.transport },
+      { label: 'Utilization', value: percent(link.load) },
+      { label: 'Volume', value: `${link.volumeGB.toFixed(2)} GB` },
+    ],
+  }
+}
+export function getBoundsViewport(
+  bounds: SceneHitBounds,
+  width: number,
+  height: number,
+  padding = 26,
+): ViewportState {
+  const scale = clamp(
+    Math.min((width - padding * 2) / bounds.width, (height - padding * 2) / bounds.height),
+    0.04,
+    32,
+  )
+  return {
+    scale,
+    x: (width - bounds.width * scale) / 2 - bounds.x * scale,
+    y: (height - bounds.height * scale) / 2 - bounds.y * scale,
+  }
+}
+export function getFitViewport(
+  model: TopologySceneModel,
+  width: number,
+  height: number,
+): ViewportState {
+  return getBoundsViewport(
+    {
+      x: 0,
+      y: 0,
+      width: model.width,
+      height: model.height,
+    },
+    width,
+    height,
+  )
+}
+export function worldToScreen(bounds: SceneHitBounds, viewport: ViewportState) {
+  return {
+    x: bounds.x * viewport.scale + viewport.x,
+    y: bounds.y * viewport.scale + viewport.y,
+    width: bounds.width * viewport.scale,
+    height: bounds.height * viewport.scale,
+  }
+}

src/lib/trainingClusterModel.ts ADDED Viewed

	@@ -0,0 +1,1882 @@

+export interface ModelConfig {
+  architecture: 'dense' | 'moe'
+  hiddenDim: number
+  numLayers: number
+  numHeads: number
+  numKVHeads: number
+  vocabSize: number
+  intermediateSize: number
+  tiedEmbeddings: boolean
+  attentionProfile?: {
+    type: 'full' | 'hybrid'
+    slidingWindowSize?: number
+    globalAttentionFraction?: number
+    globalAttentionEveryN?: number
+  }
+  moe?: {
+    numExperts: number
+    expertsPerToken: number
+    numDenseLayers: number
+    expertIntermediateSize: number
+    activeParamsPerToken?: number
+  }
+}
+export interface TrainingConfig {
+  microBatchSize: number
+  seqLength: number
+  gradAccumSteps: number
+  precision: 'fp32' | 'bf16' | 'fp16' | 'fp8'
+  activationCheckpointing: boolean
+  optimizer: 'adam' | 'adamw' | 'sgd' | 'muon'
+}
+export interface GPUSpec {
+  name: string
+  hbmCapacityGB: number
+  peakTFLOPsBF16: number
+  memBandwidthTBs: number
+}
+export interface ClusterConfig {
+  gpuType: GPUSpec
+  gpusPerNode: number
+  numNodes: number
+  intraNodeBandwidthGBs: number
+  interNodeBandwidthGBs: number
+  nodesPerRack?: number
+  rackLabel?: string
+  nodeLabel?: string
+  podLabel?: string
+}
+export interface ParallelismConfig {
+  tp: number
+  pp: number
+  cp: number
+  ep: number
+  distributedOptimizer: boolean
+  fsdpShardGroupSize: number
+  zeroStage: 0 | 1 | 2 | 3
+}
+export interface ClusterAnalysis {
+  feasible: boolean
+  infeasibilityReason?: string
+  totalParams: number
+  activeParamsPerToken: number
+  globalBatchSizeTokens: number
+  totalGPUs: number
+  derivedParallelism: {
+    dp: number
+    replicaGroups: number
+    fsdpShardGroupSize: number
+    fsdpGroupSize: number
+    ep: number
+  }
+  memoryBreakdown: {
+    parametersGB: number
+    optimizerStatesGB: number
+    gradientsGB: number
+    activationsGB: number
+    totalGB: number
+    hbmCapacityGB: number
+    utilizationPercent: number
+  }
+  pipelineStages: {
+    stageIndex: number
+    layerRange: [number, number]
+    numLayers: number
+    memoryGB: number
+    hasEmbedding: boolean
+    hasOutputHead: boolean
+  }[]
+  communication: {
+    tp: {
+      allReducesPerLayer: number
+      messageSizeBytes: number
+      totalVolumePerStepGB: number
+      timePerStepMs: number
+      linkUtilizationPercent: number
+    }
+    pp: {
+      activationMessageSizeBytes: number
+      numP2PTransfersPerStep: number
+      totalVolumePerStepGB: number
+      timePerStepMs: number
+      usesInterNode: boolean
+    }
+    cp: {
+      collectivesPerLayer: number
+      messageSizeBytes: number
+      totalVolumePerStepGB: number
+      timePerStepMs: number
+      linkUtilizationPercent: number
+      usesInterNode: boolean
+    }
+    fsdp: {
+      collectivesPerLayer: number
+      messageSizeBytes: number
+      totalVolumePerStepGB: number
+      timePerStepMs: number
+      linkUtilizationPercent: number
+      usesInterNode: boolean
+    }
+    ep: {
+      allToAllsPerLayer: number
+      messageSizeBytes: number
+      totalVolumePerStepGB: number
+      timePerStepMs: number
+      linkUtilizationPercent: number
+      usesInterNode: boolean
+    }
+    dp: {
+      gradientVolumePerGPU_GB: number
+      allReduceTimeMs: number
+      canOverlapWithBackward: boolean
+      linkUtilizationPercent: number
+    }
+  }
+  throughput: {
+    computeTimePerStepMs: number
+    communicationTimePerStepMs: number
+    pipelineBubbleFraction: number
+    pipelineBubbleTimeMs: number
+    totalStepTimeMs: number
+    tokensPerSecond: number
+    mfu: number
+  }
+  gpuMap: {
+    globalGPUIndex: number
+    nodeIndex: number
+    localGPUIndex: number
+    tpGroup: number
+    tpLane: number
+    ppStage: number
+    cpShard: number
+    epLane: number
+    dpReplica: number
+    replicaGroup: number
+    fsdpRank: number
+    memoryUsedGB: number
+    memoryCapacityGB: number
+    isActive: boolean
+  }[]
+  links: {
+    fromGPU: number
+    toGPU: number
+    type: 'nvlink' | 'infiniband'
+    trafficType: 'tp' | 'pp' | 'cp' | 'fsdp' | 'ep' | 'dp'
+    volumeGB: number
+    utilizationPercent: number
+  }[]
+}
+type LayerDistribution = {
+  stageIndex: number
+  startLayer: number
+  endLayer: number
+  numLayers: number
+}
+type StageMemory = {
+  parametersGB: number
+  optimizerStatesGB: number
+  gradientsGB: number
+  activationsGB: number
+  totalGB: number
+}
+type StageParameterCount = {
+  stageParams: number
+  sharedParams: number
+  expertParams: number
+  denseLayers: number
+  moeLayers: number
+  hasEmbedding: boolean
+  hasOutputHead: boolean
+}
+type PlacementEntry = {
+  globalGPUIndex: number
+  nodeIndex: number
+  localGPUIndex: number
+  tpGroup: number
+  tpLane: number
+  ppStage: number
+  cpShard: number
+  epLane: number
+  dpReplica: number
+  replicaGroup: number
+  fsdpRank: number
+  isActive: boolean
+}
+type DerivedParallelism = {
+  modelParallelSize: number
+  dp: number
+  replicaGroups: number
+  fsdpGroupSize: number
+  fsdpDataParallelDegree: number
+}
+type ModelBreakdown = ReturnType<typeof getModelBreakdown>
+type RingCommStats = {
+  volumeBytesPerGpu: number
+  totalVolumeBytes: number
+  timePerStepMs: number
+  linkUtilizationPercent: number
+  usesInterNode: boolean
+}
+const BYTES_PER_GB = 1e9
+const TP_ALL_REDUCES_PER_LAYER = 4
+const CP_COLLECTIVES_PER_LAYER = 2
+const FSDP_COLLECTIVES_PER_LAYER = 4
+const EP_ALL_TO_ALLS_PER_LAYER = 2
+const DEFAULT_BF16_EFFICIENCY = 0.56
+const clamp = (value: number, min: number, max: number) =>
+  Math.min(Math.max(value, min), max)
+const bytesToGB = (bytes: number) => bytes / BYTES_PER_GB
+const round2 = (value: number) => Math.round(value * 100) / 100
+const getParameterBytes = (precision: TrainingConfig['precision']) => {
+  switch (precision) {
+    case 'fp32':
+      return 4
+    case 'fp8':
+      return 1
+    default:
+      return 2
+  }
+}
+const getActivationBytes = (precision: TrainingConfig['precision']) =>
+  precision === 'fp32' ? 4 : 2
+const getGradientBytes = (precision: TrainingConfig['precision']) =>
+  precision === 'fp32' ? 4 : 2
+const getOptimizerBytesPerParam = (
+  optimizer: TrainingConfig['optimizer'],
+  precision: TrainingConfig['precision'],
+) => {
+  if (optimizer === 'sgd') {
+    return 4
+  }
+  // Muon keeps lower optimizer state than Adam-family optimizers in practice.
+  // We model it as 8 bytes per parameter of extra state on top of bf16 weights.
+  if (optimizer === 'muon') {
+    return 8
+  }
+  return precision === 'fp32' ? 8 : 12
+}
+const getPeakTFLOPsForPrecision = (gpu: GPUSpec, precision: TrainingConfig['precision']) => {
+  switch (precision) {
+    case 'fp32':
+      return gpu.peakTFLOPsBF16 * 0.25
+    case 'fp8':
+      return gpu.peakTFLOPsBF16 * 2
+    default:
+      return gpu.peakTFLOPsBF16
+  }
+}
+const getSustainedComputeEfficiency = (training: TrainingConfig) => {
+  const checkpointPenalty = training.activationCheckpointing ? 0.02 : 0
+  const fp32Penalty = training.precision === 'fp32' ? 0.08 : 0
+  const moeBoost = training.optimizer === 'muon' ? 0.02 : 0
+  return clamp(DEFAULT_BF16_EFFICIENCY - checkpointPenalty - fp32Penalty + moeBoost, 0.3, 0.62)
+}
+const distributeLayers = (numLayers: number, pp: number): LayerDistribution[] => {
+  const baseLayers = Math.floor(numLayers / pp)
+  const remainder = numLayers % pp
+  let startLayer = 0
+  return Array.from({ length: pp }, (_, stageIndex) => {
+    const stageLayers = baseLayers + (stageIndex < remainder ? 1 : 0)
+    const endLayer = startLayer + stageLayers - 1
+    const distribution = {
+      stageIndex,
+      startLayer,
+      endLayer,
+      numLayers: stageLayers,
+    }
+    startLayer += stageLayers
+    return distribution
+  })
+}
+const getDefaultFabric = (gpu: GPUSpec) => {
+  const normalizedName = gpu.name.toLowerCase()
+  if (normalizedName.includes('gb200')) {
+    return {
+      intraNodeBandwidthGBs: 900,
+      interNodeBandwidthGBs: 100,
+    }
+  }
+  if (normalizedName.includes('h100')) {
+    return {
+      intraNodeBandwidthGBs: 450,
+      interNodeBandwidthGBs: 100,
+    }
+  }
+  return {
+    intraNodeBandwidthGBs: 300,
+    interNodeBandwidthGBs: 50,
+  }
+}
+const getModelBreakdown = (model: ModelConfig) => {
+  const headDim = model.hiddenDim / model.numHeads
+  const embeddingParams = model.vocabSize * model.hiddenDim
+  const kvProjectionDim = model.numKVHeads * headDim
+  const perLayerAttentionParams =
+    model.hiddenDim * (model.hiddenDim + 2 * kvProjectionDim + model.hiddenDim)
+  const perLayerDenseMlpParams = model.hiddenDim * model.intermediateSize * 3
+  const perLayerNormParams = model.hiddenDim * 2
+  const finalNormParams = model.hiddenDim
+  const outputHeadParams = model.tiedEmbeddings ? 0 : embeddingParams
+  const perExpertParams =
+    model.architecture === 'moe' && model.moe
+      ? model.hiddenDim * model.moe.expertIntermediateSize * 3
+      : 0
+  const totalExpertParamsPerLayer =
+    model.architecture === 'moe' && model.moe ? perExpertParams * model.moe.numExperts : 0
+  const denseLayerCount =
+    model.architecture === 'moe' && model.moe ? model.moe.numDenseLayers : model.numLayers
+  const moeLayerCount = model.numLayers - denseLayerCount
+  const sharedDenseLayerParams =
+    perLayerAttentionParams + perLayerDenseMlpParams + perLayerNormParams
+  const sharedMoeLayerParams = perLayerAttentionParams + perLayerNormParams
+  const sharedParams =
+    embeddingParams +
+    denseLayerCount * sharedDenseLayerParams +
+    moeLayerCount * sharedMoeLayerParams +
+    finalNormParams +
+    outputHeadParams
+  const totalParams = sharedParams + moeLayerCount * totalExpertParamsPerLayer
+  const derivedActiveParams =
+    model.architecture === 'moe' && model.moe
+      ? embeddingParams +
+        denseLayerCount * sharedDenseLayerParams +
+        moeLayerCount *
+          (sharedMoeLayerParams + model.moe.expertsPerToken * perExpertParams) +
+        finalNormParams +
+        outputHeadParams
+      : totalParams
+  const activeParamsPerToken =
+    model.architecture === 'moe' && model.moe?.activeParamsPerToken != null
+      ? model.moe.activeParamsPerToken
+      : derivedActiveParams
+  const perLayerTotalParams =
+    model.architecture === 'moe'
+      ? sharedMoeLayerParams + totalExpertParamsPerLayer
+      : sharedDenseLayerParams
+  return {
+    headDim,
+    kvProjectionDim,
+    embeddingParams,
+    perLayerAttentionParams,
+    perLayerDenseMlpParams,
+    perLayerNormParams,
+    perExpertParams,
+    totalExpertParamsPerLayer,
+    sharedDenseLayerParams,
+    sharedMoeLayerParams,
+    denseLayerCount,
+    moeLayerCount,
+    sharedParams,
+    perLayerTotalParams,
+    finalNormParams,
+    outputHeadParams,
+    totalParams,
+    activeParamsPerToken,
+  }
+}
+const getConcurrentMicroBatches = (
+  training: TrainingConfig,
+  parallelism: ParallelismConfig,
+) => {
+  if (parallelism.pp <= 1) {
+    return 1
+  }
+  return Math.max(1, Math.min(training.gradAccumSteps, parallelism.pp))
+}
+const getAttentionMultiplier = (model: ModelConfig, seqLength: number) => {
+  const profile = model.attentionProfile
+  if (!profile || profile.type === 'full') {
+    return 1
+  }
+  const windowMultiplier =
+    profile.slidingWindowSize != null
+      ? clamp(profile.slidingWindowSize / seqLength, 0, 1)
+      : 1
+  const globalFraction =
+    profile.globalAttentionFraction ??
+    (profile.globalAttentionEveryN != null ? 1 / profile.globalAttentionEveryN : 0.25)
+  return clamp(globalFraction + (1 - globalFraction) * windowMultiplier, windowMultiplier, 1)
+}
+const getStageLayerMix = (stage: LayerDistribution, model: ModelConfig) => {
+  if (model.architecture !== 'moe' || !model.moe) {
+    return {
+      denseLayers: stage.numLayers,
+      moeLayers: 0,
+    }
+  }
+  const denseEnd = model.moe.numDenseLayers - 1
+  const denseLayers =
+    denseEnd < stage.startLayer
+      ? 0
+      : Math.max(0, Math.min(stage.endLayer, denseEnd) - stage.startLayer + 1)
+  return {
+    denseLayers,
+    moeLayers: stage.numLayers - denseLayers,
+  }
+}
+const getStageParameterCount = (
+  stage: LayerDistribution,
+  modelBreakdown: ModelBreakdown,
+  parallelism: ParallelismConfig,
+  model: ModelConfig,
+): StageParameterCount => {
+  const layerMix = getStageLayerMix(stage, model)
+  let sharedParams =
+    layerMix.denseLayers * modelBreakdown.sharedDenseLayerParams +
+    layerMix.moeLayers * modelBreakdown.sharedMoeLayerParams
+  const expertParams = layerMix.moeLayers * modelBreakdown.totalExpertParamsPerLayer
+  const hasEmbedding = stage.stageIndex === 0
+  const hasOutputHead = stage.stageIndex === parallelism.pp - 1
+  if (hasEmbedding) {
+    sharedParams += modelBreakdown.embeddingParams
+  }
+  if (hasOutputHead) {
+    sharedParams += modelBreakdown.finalNormParams + modelBreakdown.outputHeadParams
+  }
+  return {
+    stageParams: sharedParams + expertParams,
+    sharedParams,
+    expertParams,
+    denseLayers: layerMix.denseLayers,
+    moeLayers: layerMix.moeLayers,
+    hasEmbedding,
+    hasOutputHead,
+  }
+}
+const getActivationMemoryBytesPerLayer = ({
+  model,
+  training,
+  parallelism,
+  isMoeLayer,
+}: {
+  model: ModelConfig
+  training: TrainingConfig
+  parallelism: ParallelismConfig
+  isMoeLayer: boolean
+}) => {
+  const activationBytes = getActivationBytes(training.precision)
+  const shardedSequenceLength = training.seqLength / parallelism.cp
+  const tokensPerShard = training.microBatchSize * shardedSequenceLength
+  const kvHiddenDim = model.numKVHeads * (model.hiddenDim / model.numHeads)
+  const tpSequenceShardFactor = parallelism.tp > 1 ? parallelism.tp : 1
+  // Sequence parallelism shards the residual stream and checkpointed layer boundaries across
+  // the TP group. We assume TP-enabled dense training uses this Megatron-style optimization.
+  const hiddenStateBytes =
+    (tokensPerShard * model.hiddenDim * activationBytes) / tpSequenceShardFactor
+  const attentionMultiplier = getAttentionMultiplier(model, training.seqLength)
+  // Sequence-parallel CP reduces the activation footprint by the number of sequence shards.
+  const qkvBytes =
+    tokensPerShard * (model.hiddenDim + 2 * kvHiddenDim) * activationBytes * attentionMultiplier
+  const denseMlpBytes = tokensPerShard * model.intermediateSize * activationBytes * 2
+  const moeMlpBytes =
+    isMoeLayer && model.moe
+      ? (tokensPerShard *
+          model.moe.expertIntermediateSize *
+          activationBytes *
+          model.moe.expertsPerToken *
+          2) /
+        Math.max(parallelism.ep, 1)
+      : 0
+  const shardedIntermediateBytes =
+    (qkvBytes + (isMoeLayer ? moeMlpBytes : denseMlpBytes)) / Math.max(parallelism.tp, 1)
+  if (training.activationCheckpointing) {
+    return hiddenStateBytes * 2 + shardedIntermediateBytes * 0.25
+  }
+  return hiddenStateBytes * 6 + shardedIntermediateBytes * 2
+}
+const getStageMemory = (
+  stageParams: StageParameterCount,
+  model: ModelConfig,
+  training: TrainingConfig,
+  parallelism: ParallelismConfig,
+  derivedParallelism: DerivedParallelism,
+) => {
+  const parameterBytes = getParameterBytes(training.precision)
+  const gradientBytes = getGradientBytes(training.precision)
+  const optimizerBytes = getOptimizerBytesPerParam(training.optimizer, training.precision)
+  const fsdpShardFactor =
+    parallelism.fsdpShardGroupSize > 1 ? derivedParallelism.fsdpDataParallelDegree : 1
+  const distributedShardFactor = parallelism.distributedOptimizer ? derivedParallelism.dp : 1
+  const parameterShardFactor =
+    parallelism.zeroStage >= 3 ? fsdpShardFactor : 1
+  const optimizerShardFactor =
+    parallelism.zeroStage >= 1
+      ? parallelism.fsdpShardGroupSize > 1
+        ? fsdpShardFactor
+        : distributedShardFactor
+      : 1
+  const gradientShardFactor =
+    parallelism.zeroStage >= 2
+      ? parallelism.fsdpShardGroupSize > 1
+        ? fsdpShardFactor
+        : derivedParallelism.dp
+      : 1
+  const sharedParamsLocal = stageParams.sharedParams / Math.max(parallelism.tp, 1)
+  const expertParamsLocal =
+    stageParams.expertParams / Math.max(parallelism.tp * parallelism.ep, 1)
+  const parameterMemoryBytes =
+    (sharedParamsLocal / parameterShardFactor + expertParamsLocal / parameterShardFactor) *
+    parameterBytes
+  const optimizerMemoryBytes =
+    (sharedParamsLocal / optimizerShardFactor + expertParamsLocal / optimizerShardFactor) *
+    optimizerBytes
+  const gradientMemoryBytes =
+    (sharedParamsLocal / gradientShardFactor + expertParamsLocal / gradientShardFactor) *
+    gradientBytes
+  const denseLayerActivationBytes = getActivationMemoryBytesPerLayer({
+    model,
+    training,
+    parallelism,
+    isMoeLayer: false,
+  })
+  const moeLayerActivationBytes = getActivationMemoryBytesPerLayer({
+    model,
+    training,
+    parallelism,
+    isMoeLayer: true,
+  })
+  const concurrentMicroBatches = getConcurrentMicroBatches(training, parallelism)
+  let activationMemoryBytes =
+    (denseLayerActivationBytes * stageParams.denseLayers +
+      moeLayerActivationBytes * stageParams.moeLayers) *
+    concurrentMicroBatches
+  if (training.activationCheckpointing && stageParams.stageParams > 0) {
+    activationMemoryBytes +=
+      Math.max(denseLayerActivationBytes, moeLayerActivationBytes) * 1.5
+  }
+  const totalBytes =
+    parameterMemoryBytes + optimizerMemoryBytes + gradientMemoryBytes + activationMemoryBytes
+  return {
+    parametersGB: bytesToGB(parameterMemoryBytes),
+    optimizerStatesGB: bytesToGB(optimizerMemoryBytes),
+    gradientsGB: bytesToGB(gradientMemoryBytes),
+    activationsGB: bytesToGB(activationMemoryBytes),
+    totalGB: bytesToGB(totalBytes),
+  }
+}
+const getStageMemoryMap = (
+  model: ModelConfig,
+  training: TrainingConfig,
+  parallelism: ParallelismConfig,
+  derivedParallelism: DerivedParallelism,
+) => {
+  const modelBreakdown = getModelBreakdown(model)
+  const layerDistribution = distributeLayers(model.numLayers, parallelism.pp)
+  const stageMemory = new Map<number, StageMemory>()
+  const stageParameters = new Map<number, StageParameterCount>()
+  for (const stage of layerDistribution) {
+    const stageParameterCount = getStageParameterCount(stage, modelBreakdown, parallelism, model)
+    stageParameters.set(stage.stageIndex, stageParameterCount)
+    stageMemory.set(
+      stage.stageIndex,
+      getStageMemory(stageParameterCount, model, training, parallelism, derivedParallelism),
+    )
+  }
+  return {
+    modelBreakdown,
+    layerDistribution,
+    stageMemory,
+    stageParameters,
+  }
+}
+const buildPlacement = (
+  cluster: ClusterConfig,
+  parallelism: ParallelismConfig,
+  derivedParallelism: DerivedParallelism,
+  requiredGPUs: number,
+) => {
+  const totalGPUs = cluster.gpusPerNode * cluster.numNodes
+  const placement: PlacementEntry[] = []
+  let nodeIndex = 0
+  let localGPUIndex = 0
+  let globalGPUIndex = 0
+  for (let replicaGroup = 0; replicaGroup < derivedParallelism.replicaGroups; replicaGroup += 1) {
+    for (let fsdpRank = 0; fsdpRank < derivedParallelism.fsdpDataParallelDegree; fsdpRank += 1) {
+      const dpReplica = replicaGroup * derivedParallelism.fsdpDataParallelDegree + fsdpRank
+      for (let ppStage = 0; ppStage < parallelism.pp; ppStage += 1) {
+        for (let cpShard = 0; cpShard < parallelism.cp; cpShard += 1) {
+          if (localGPUIndex + parallelism.ep * parallelism.tp > cluster.gpusPerNode) {
+            nodeIndex += 1
+            localGPUIndex = 0
+          }
+          for (let epLane = 0; epLane < parallelism.ep; epLane += 1) {
+            for (let tpLane = 0; tpLane < parallelism.tp; tpLane += 1) {
+              placement.push({
+                globalGPUIndex,
+                nodeIndex,
+                localGPUIndex,
+                tpGroup:
+                  (((dpReplica * parallelism.pp + ppStage) * parallelism.cp + cpShard) *
+                    parallelism.ep) +
+                  epLane,
+                tpLane,
+                ppStage,
+                cpShard,
+                epLane,
+                dpReplica,
+                replicaGroup,
+                fsdpRank,
+                isActive: globalGPUIndex < requiredGPUs,
+              })
+              globalGPUIndex += 1
+              localGPUIndex += 1
+            }
+          }
+        }
+      }
+    }
+  }
+  while (placement.length < totalGPUs) {
+    if (localGPUIndex >= cluster.gpusPerNode) {
+      nodeIndex += 1
+      localGPUIndex = 0
+    }
+    placement.push({
+      globalGPUIndex,
+      nodeIndex,
+      localGPUIndex,
+      tpGroup: -1,
+      tpLane: -1,
+      ppStage: -1,
+      cpShard: -1,
+      epLane: -1,
+      dpReplica: -1,
+      replicaGroup: -1,
+      fsdpRank: -1,
+      isActive: false,
+    })
+    globalGPUIndex += 1
+    localGPUIndex += 1
+  }
+  return placement
+}
+const getPlacementEntry = (
+  placement: PlacementEntry[],
+  filters: Partial<
+    Pick<
+      PlacementEntry,
+      'dpReplica' | 'replicaGroup' | 'fsdpRank' | 'ppStage' | 'cpShard' | 'epLane' | 'tpLane'
+    >
+  >,
+) =>
+  placement.find(
+    (entry) =>
+      (filters.dpReplica == null || entry.dpReplica === filters.dpReplica) &&
+      (filters.replicaGroup == null || entry.replicaGroup === filters.replicaGroup) &&
+      (filters.fsdpRank == null || entry.fsdpRank === filters.fsdpRank) &&
+      (filters.ppStage == null || entry.ppStage === filters.ppStage) &&
+      (filters.cpShard == null || entry.cpShard === filters.cpShard) &&
+      (filters.epLane == null || entry.epLane === filters.epLane) &&
+      (filters.tpLane == null || entry.tpLane === filters.tpLane),
+  )
+const getDerivedParallelism = (
+  cluster: ClusterConfig,
+  parallelism: ParallelismConfig,
+): DerivedParallelism | null => {
+  const totalGPUs = cluster.gpusPerNode * cluster.numNodes
+  const modelParallelSize =
+    parallelism.tp * parallelism.pp * parallelism.cp * parallelism.ep
+  if (modelParallelSize <= 0 || totalGPUs % modelParallelSize !== 0) {
+    return null
+  }
+  const dp = totalGPUs / modelParallelSize
+  const fsdpGroupSize =
+    parallelism.fsdpShardGroupSize > 1 ? parallelism.fsdpShardGroupSize : modelParallelSize
+  if (fsdpGroupSize % modelParallelSize !== 0 || totalGPUs % fsdpGroupSize !== 0) {
+    return null
+  }
+  return {
+    modelParallelSize,
+    dp,
+    replicaGroups: totalGPUs / fsdpGroupSize,
+    fsdpGroupSize,
+    fsdpDataParallelDegree: fsdpGroupSize / modelParallelSize,
+  }
+}
+const getMaxBandwidthForCollective = (
+  members: PlacementEntry[],
+  cluster: ClusterConfig,
+) => {
+  if (members.length <= 1) {
+    return {
+      bandwidthGBs: cluster.intraNodeBandwidthGBs,
+      usesInterNode: false,
+    }
+  }
+  const nodeSet = new Set(members.map((member) => member.nodeIndex))
+  const usesInterNode = nodeSet.size > 1
+  return {
+    bandwidthGBs: usesInterNode
+      ? cluster.interNodeBandwidthGBs
+      : cluster.intraNodeBandwidthGBs,
+    usesInterNode,
+  }
+}
+const getRingCommStats = ({
+  groupCount,
+  groupWidth,
+  messageBytes,
+  collectiveCount,
+  membersForBandwidth,
+  cluster,
+  totalStepTimeMs,
+}: {
+  groupCount: number
+  groupWidth: number
+  messageBytes: number
+  collectiveCount: number
+  membersForBandwidth: PlacementEntry[]
+  cluster: ClusterConfig
+  totalStepTimeMs: number
+}): RingCommStats => {
+  if (groupWidth <= 1 || collectiveCount <= 0 || messageBytes <= 0) {
+    return {
+      volumeBytesPerGpu: 0,
+      totalVolumeBytes: 0,
+      timePerStepMs: 0,
+      linkUtilizationPercent: 0,
+      usesInterNode: false,
+    }
+  }
+  const ringVolumeBytes = (2 * (groupWidth - 1) * messageBytes) / groupWidth
+  const volumeBytesPerGpu = ringVolumeBytes * collectiveCount
+  const totalVolumeBytes = volumeBytesPerGpu * groupWidth * groupCount
+  const { bandwidthGBs, usesInterNode } = getMaxBandwidthForCollective(
+    membersForBandwidth,
+    cluster,
+  )
+  const timePerStepMs = (bytesToGB(volumeBytesPerGpu) / bandwidthGBs) * 1000
+  const linkUtilizationPercent =
+    totalStepTimeMs > 0
+      ? clamp(
+          (bytesToGB(volumeBytesPerGpu) / (bandwidthGBs * (totalStepTimeMs / 1000))) * 100,
+          0,
+          100,
+        )
+      : 0
+  return {
+    volumeBytesPerGpu,
+    totalVolumeBytes,
+    timePerStepMs,
+    linkUtilizationPercent,
+    usesInterNode,
+  }
+}
+export function analyzeCluster(
+  model: ModelConfig,
+  training: TrainingConfig,
+  cluster: ClusterConfig,
+  parallelism: ParallelismConfig,
+): ClusterAnalysis {
+  const totalGPUs = cluster.gpusPerNode * cluster.numNodes
+  const derivedParallelism = getDerivedParallelism(cluster, parallelism)
+  const globalBatchSizeTokens =
+    training.microBatchSize *
+    training.seqLength *
+    training.gradAccumSteps *
+    (derivedParallelism?.dp ?? 0)
+  const emptyGpuMap = Array.from({ length: totalGPUs }, (_, globalGPUIndex) => ({
+    globalGPUIndex,
+    nodeIndex: Math.floor(globalGPUIndex / cluster.gpusPerNode),
+    localGPUIndex: globalGPUIndex % cluster.gpusPerNode,
+    tpGroup: -1,
+    tpLane: -1,
+    ppStage: -1,
+    cpShard: -1,
+    epLane: -1,
+    dpReplica: -1,
+    replicaGroup: -1,
+    fsdpRank: -1,
+    memoryUsedGB: 0,
+    memoryCapacityGB: cluster.gpuType.hbmCapacityGB,
+    isActive: false,
+  }))
+  const emptyAnalysis = (): ClusterAnalysis => ({
+    feasible: false,
+    infeasibilityReason: 'Invalid configuration',
+    totalParams: 0,
+    activeParamsPerToken: 0,
+    globalBatchSizeTokens,
+    totalGPUs,
+    derivedParallelism: {
+      dp: derivedParallelism?.dp ?? 0,
+      replicaGroups: derivedParallelism?.replicaGroups ?? 0,
+      fsdpShardGroupSize: parallelism.fsdpShardGroupSize,
+      fsdpGroupSize: derivedParallelism?.fsdpGroupSize ?? 0,
+      ep: parallelism.ep,
+    },
+    memoryBreakdown: {
+      parametersGB: 0,
+      optimizerStatesGB: 0,
+      gradientsGB: 0,
+      activationsGB: 0,
+      totalGB: 0,
+      hbmCapacityGB: cluster.gpuType.hbmCapacityGB,
+      utilizationPercent: 0,
+    },
+    pipelineStages: [],
+    communication: {
+      tp: {
+        allReducesPerLayer: TP_ALL_REDUCES_PER_LAYER,
+        messageSizeBytes: 0,
+        totalVolumePerStepGB: 0,
+        timePerStepMs: 0,
+        linkUtilizationPercent: 0,
+      },
+      pp: {
+        activationMessageSizeBytes: 0,
+        numP2PTransfersPerStep: 0,
+        totalVolumePerStepGB: 0,
+        timePerStepMs: 0,
+        usesInterNode: false,
+      },
+      cp: {
+        collectivesPerLayer: CP_COLLECTIVES_PER_LAYER,
+        messageSizeBytes: 0,
+        totalVolumePerStepGB: 0,
+        timePerStepMs: 0,
+        linkUtilizationPercent: 0,
+        usesInterNode: false,
+      },
+      fsdp: {
+        collectivesPerLayer: FSDP_COLLECTIVES_PER_LAYER,
+        messageSizeBytes: 0,
+        totalVolumePerStepGB: 0,
+        timePerStepMs: 0,
+        linkUtilizationPercent: 0,
+        usesInterNode: false,
+      },
+      ep: {
+        allToAllsPerLayer: EP_ALL_TO_ALLS_PER_LAYER,
+        messageSizeBytes: 0,
+        totalVolumePerStepGB: 0,
+        timePerStepMs: 0,
+        linkUtilizationPercent: 0,
+        usesInterNode: false,
+      },
+      dp: {
+        gradientVolumePerGPU_GB: 0,
+        allReduceTimeMs: 0,
+        canOverlapWithBackward: false,
+        linkUtilizationPercent: 0,
+      },
+    },
+    throughput: {
+      computeTimePerStepMs: 0,
+      communicationTimePerStepMs: 0,
+      pipelineBubbleFraction: 0,
+      pipelineBubbleTimeMs: 0,
+      totalStepTimeMs: 0,
+      tokensPerSecond: 0,
+      mfu: 0,
+    },
+    gpuMap: emptyGpuMap,
+    links: [],
+  })
+  if (
+    training.microBatchSize <= 0 ||
+    training.seqLength <= 0 ||
+    training.gradAccumSteps <= 0 ||
+    parallelism.tp <= 0 ||
+    parallelism.pp <= 0 ||
+    parallelism.cp <= 0 ||
+    parallelism.ep <= 0
+  ) {
+    const analysis = emptyAnalysis()
+    analysis.infeasibilityReason = 'Batch sizes and parallelism degrees must all be positive.'
+    return analysis
+  }
+  if (parallelism.tp * parallelism.ep > cluster.gpusPerNode) {
+    const analysis = emptyAnalysis()
+    analysis.infeasibilityReason =
+      `TP × EP requires ${parallelism.tp * parallelism.ep} GPUs per node, but nodes only have ${cluster.gpusPerNode}.`
+    return analysis
+  }
+  if (!derivedParallelism) {
+    const analysis = emptyAnalysis()
+    analysis.infeasibilityReason =
+      `World size ${totalGPUs} must be divisible by TP × PP × CP × EP, and the FSDP shard group must divide the cluster cleanly.`
+    return analysis
+  }
+  if (model.hiddenDim % model.numHeads !== 0) {
+    const analysis = emptyAnalysis()
+    analysis.infeasibilityReason =
+      `hiddenDim ${model.hiddenDim} must divide evenly across ${model.numHeads} attention heads.`
+    return analysis
+  }
+  if (model.numHeads % parallelism.tp !== 0) {
+    const analysis = emptyAnalysis()
+    analysis.infeasibilityReason =
+      `TP ${parallelism.tp} must divide the ${model.numHeads} attention heads.`
+    return analysis
+  }
+  if (model.numKVHeads % parallelism.tp !== 0) {
+    const analysis = emptyAnalysis()
+    analysis.infeasibilityReason =
+      `TP ${parallelism.tp} should divide the ${model.numKVHeads} KV heads for clean GQA sharding.`
+    return analysis
+  }
+  if (training.seqLength % parallelism.cp !== 0) {
+    const analysis = emptyAnalysis()
+    analysis.infeasibilityReason =
+      `CP ${parallelism.cp} must divide the sequence length ${training.seqLength}.`
+    return analysis
+  }
+  if (model.architecture === 'moe' && !model.moe) {
+    const analysis = emptyAnalysis()
+    analysis.infeasibilityReason = 'MoE models require expert metadata.'
+    return analysis
+  }
+  if (model.architecture === 'moe' && model.moe && model.moe.numExperts % parallelism.ep !== 0) {
+    const analysis = emptyAnalysis()
+    analysis.infeasibilityReason =
+      `EP ${parallelism.ep} must divide the ${model.moe.numExperts} experts.`
+    return analysis
+  }
+  const { modelBreakdown, layerDistribution, stageMemory, stageParameters } = getStageMemoryMap(
+    model,
+    training,
+    parallelism,
+    derivedParallelism,
+  )
+  const placement = buildPlacement(cluster, parallelism, derivedParallelism, totalGPUs)
+  const maxStageLayers = Math.max(...layerDistribution.map((stage) => stage.numLayers), 0)
+  const pipelineStages = layerDistribution.map((stage) => {
+    const stageMemoryBreakdown = stageMemory.get(stage.stageIndex)
+    const stageParameterCount = stageParameters.get(stage.stageIndex)
+    return {
+      stageIndex: stage.stageIndex,
+      layerRange: [stage.startLayer, stage.endLayer] as [number, number],
+      numLayers: stage.numLayers,
+      memoryGB: round2(
+        (stageMemoryBreakdown?.totalGB ?? 0) *
+          parallelism.tp *
+          parallelism.cp *
+          parallelism.ep *
+          derivedParallelism.dp,
+      ),
+      hasEmbedding: stageParameterCount?.hasEmbedding ?? false,
+      hasOutputHead: stageParameterCount?.hasOutputHead ?? false,
+    }
+  })
+  const worstStageIndex = pipelineStages.reduce((worstIndex, stage) => {
+    const worstStageMemory = stageMemory.get(worstIndex)?.totalGB ?? 0
+    const candidateStageMemory = stageMemory.get(stage.stageIndex)?.totalGB ?? 0
+    return candidateStageMemory > worstStageMemory ? stage.stageIndex : worstIndex
+  }, 0)
+  const worstStageMemory = stageMemory.get(worstStageIndex) ?? {
+    parametersGB: 0,
+    optimizerStatesGB: 0,
+    gradientsGB: 0,
+    activationsGB: 0,
+    totalGB: 0,
+  }
+  const pipelineBubbleFraction =
+    parallelism.pp <= 1
+      ? 0
+      : (parallelism.pp - 1) / (training.gradAccumSteps + parallelism.pp - 1)
+  const boundaryStageCount = Math.min(
+    parallelism.pp,
+    Math.max(0, Math.round(pipelineBubbleFraction * parallelism.pp)),
+  )
+  const gpuMap = placement.map((entry) => {
+    const stageMemoryBreakdown =
+      entry.ppStage >= 0
+        ? stageMemory.get(entry.ppStage) ?? {
+            parametersGB: 0,
+            optimizerStatesGB: 0,
+            gradientsGB: 0,
+            activationsGB: 0,
+            totalGB: 0,
+          }
+        : {
+            parametersGB: 0,
+            optimizerStatesGB: 0,
+            gradientsGB: 0,
+            activationsGB: 0,
+            totalGB: 0,
+          }
+    const bubbleIdle = entry.ppStage >= parallelism.pp - boundaryStageCount && entry.ppStage >= 0
+    return {
+      globalGPUIndex: entry.globalGPUIndex,
+      nodeIndex: entry.nodeIndex,
+      localGPUIndex: entry.localGPUIndex,
+      tpGroup: entry.tpGroup,
+      tpLane: entry.tpLane,
+      ppStage: entry.ppStage,
+      cpShard: entry.cpShard,
+      epLane: entry.epLane,
+      dpReplica: entry.dpReplica,
+      replicaGroup: entry.replicaGroup,
+      fsdpRank: entry.fsdpRank,
+      memoryUsedGB: round2(entry.isActive ? stageMemoryBreakdown.totalGB : 0),
+      memoryCapacityGB: cluster.gpuType.hbmCapacityGB,
+      isActive: entry.isActive && !bubbleIdle,
+    }
+  })
+  const activationBytes = getActivationBytes(training.precision)
+  const shardedSequenceLength = training.seqLength / parallelism.cp
+  const tokensPerMicroBatchShard = training.microBatchSize * shardedSequenceLength
+  const collectiveMessageBytes =
+    tokensPerMicroBatchShard * model.hiddenDim * activationBytes
+  const attentionComputeMultiplier = 0.65 + 0.35 * getAttentionMultiplier(model, training.seqLength)
+  const activationCheckpointComputeMultiplier = training.activationCheckpointing ? 1.2 : 1
+  const totalFlopsPerStep =
+    6 *
+    modelBreakdown.activeParamsPerToken *
+    training.microBatchSize *
+    training.seqLength *
+    training.gradAccumSteps *
+    derivedParallelism.dp *
+    attentionComputeMultiplier *
+    activationCheckpointComputeMultiplier
+  const launchedGPUs = Math.max(totalGPUs, 1)
+  const flopsPerGpuPerStep = totalFlopsPerStep / launchedGPUs
+  const peakTFLOPs = getPeakTFLOPsForPrecision(cluster.gpuType, training.precision)
+  const sustainedTFLOPs = peakTFLOPs * getSustainedComputeEfficiency(training)
+  const computeTimePerStepMs = (flopsPerGpuPerStep / (sustainedTFLOPs * 1e12)) * 1000
+  const pipelineBubbleTimeMs =
+    pipelineBubbleFraction >= 1
+      ? 0
+      : (computeTimePerStepMs * pipelineBubbleFraction) / (1 - pipelineBubbleFraction)
+  const tentativeTotalStepTimeMs = computeTimePerStepMs + pipelineBubbleTimeMs
+  const tpMembers = placement.filter(
+    (entry) =>
+      entry.dpReplica === 0 &&
+      entry.ppStage === 0 &&
+      entry.cpShard === 0 &&
+      entry.epLane === 0 &&
+      entry.tpLane >= 0,
+  )
+  const tpStats = getRingCommStats({
+    groupCount: parallelism.pp * parallelism.cp * parallelism.ep * derivedParallelism.dp,
+    groupWidth: parallelism.tp,
+    messageBytes: collectiveMessageBytes,
+    collectiveCount: TP_ALL_REDUCES_PER_LAYER * maxStageLayers * training.gradAccumSteps,
+    membersForBandwidth: tpMembers,
+    cluster,
+    totalStepTimeMs: tentativeTotalStepTimeMs,
+  })
+  const cpMembers = placement.filter(
+    (entry) =>
+      entry.dpReplica === 0 &&
+      entry.ppStage === 0 &&
+      entry.epLane === 0 &&
+      entry.tpLane === 0 &&
+      entry.cpShard >= 0,
+  )
+  const cpStats = getRingCommStats({
+    groupCount: parallelism.pp * derivedParallelism.dp * parallelism.tp * parallelism.ep,
+    groupWidth: parallelism.cp,
+    messageBytes: collectiveMessageBytes,
+    collectiveCount: CP_COLLECTIVES_PER_LAYER * maxStageLayers * training.gradAccumSteps,
+    membersForBandwidth: cpMembers,
+    cluster,
+    totalStepTimeMs: tentativeTotalStepTimeMs,
+  })
+  const averageSharedLayerParams =
+    model.numLayers > 0
+      ? (modelBreakdown.denseLayerCount * modelBreakdown.sharedDenseLayerParams +
+          modelBreakdown.moeLayerCount * modelBreakdown.sharedMoeLayerParams) /
+        model.numLayers
+      : 0
+  const fsdpMessageBytes =
+    parallelism.zeroStage >= 3 && derivedParallelism.fsdpDataParallelDegree > 1
+      ? (averageSharedLayerParams / parallelism.tp / derivedParallelism.fsdpDataParallelDegree) *
+        getParameterBytes(training.precision)
+      : 0
+  const fsdpMembers = placement.filter(
+    (entry) =>
+      entry.replicaGroup === 0 &&
+      entry.ppStage === 0 &&
+      entry.cpShard === 0 &&
+      entry.epLane === 0 &&
+      entry.tpLane === 0,
+  )
+  const fsdpStats = getRingCommStats({
+    groupCount:
+      derivedParallelism.replicaGroups *
+      parallelism.pp *
+      parallelism.cp *
+      parallelism.ep *
+      parallelism.tp,
+    groupWidth: derivedParallelism.fsdpDataParallelDegree,
+    messageBytes: fsdpMessageBytes,
+    collectiveCount: FSDP_COLLECTIVES_PER_LAYER * maxStageLayers * training.gradAccumSteps,
+    membersForBandwidth: fsdpMembers,
+    cluster,
+    totalStepTimeMs: tentativeTotalStepTimeMs,
+  })
+  const epMembers = placement.filter(
+    (entry) =>
+      entry.dpReplica === 0 &&
+      entry.ppStage === 0 &&
+      entry.cpShard === 0 &&
+      entry.tpLane === 0 &&
+      entry.epLane >= 0,
+  )
+  const moeLayerCount = modelBreakdown.moeLayerCount
+  const epMessageBytes =
+    model.architecture === 'moe' && model.moe
+      ? tokensPerMicroBatchShard *
+        model.hiddenDim *
+        activationBytes *
+        model.moe.expertsPerToken
+      : 0
+  const epTransferCount = EP_ALL_TO_ALLS_PER_LAYER * moeLayerCount * training.gradAccumSteps
+  const epStats = (() => {
+    if (parallelism.ep <= 1 || epTransferCount <= 0 || epMessageBytes <= 0) {
+      return {
+        totalVolumeBytes: 0,
+        timePerStepMs: 0,
+        linkUtilizationPercent: 0,
+        usesInterNode: false,
+      }
+    }
+    const { bandwidthGBs, usesInterNode } = getMaxBandwidthForCollective(epMembers, cluster)
+    const volumeBytesPerGpu = epMessageBytes * epTransferCount * 2
+    const totalVolumeBytes =
+      volumeBytesPerGpu *
+      parallelism.ep *
+      parallelism.pp *
+      parallelism.cp *
+      parallelism.tp *
+      derivedParallelism.dp
+    const timePerStepMs = (bytesToGB(volumeBytesPerGpu) / bandwidthGBs) * 1000
+    const linkUtilizationPercent =
+      tentativeTotalStepTimeMs > 0
+        ? clamp(
+            (bytesToGB(volumeBytesPerGpu) /
+              (bandwidthGBs * (tentativeTotalStepTimeMs / 1000))) *
+              100,
+            0,
+            100,
+          )
+        : 0
+    return {
+      totalVolumeBytes,
+      timePerStepMs,
+      linkUtilizationPercent,
+      usesInterNode,
+    }
+  })()
+  let ppTotalVolumeBytes = 0
+  let ppTimePerStepMs = 0
+  let ppUsesInterNode = false
+  for (let dpReplica = 0; dpReplica < derivedParallelism.dp; dpReplica += 1) {
+    for (let cpShard = 0; cpShard < parallelism.cp; cpShard += 1) {
+      for (let stageIndex = 0; stageIndex < parallelism.pp - 1; stageIndex += 1) {
+        const source = getPlacementEntry(placement, {
+          dpReplica,
+          ppStage: stageIndex,
+          cpShard,
+          epLane: 0,
+          tpLane: 0,
+        })
+        const target = getPlacementEntry(placement, {
+          dpReplica,
+          ppStage: stageIndex + 1,
+          cpShard,
+          epLane: 0,
+          tpLane: 0,
+        })
+        if (!source || !target) {
+          continue
+        }
+        const usesInterNode = source.nodeIndex !== target.nodeIndex
+        const bandwidthGBs = usesInterNode
+          ? cluster.interNodeBandwidthGBs
+          : cluster.intraNodeBandwidthGBs
+        const perLaneBytes = collectiveMessageBytes / parallelism.tp
+        ppUsesInterNode ||= usesInterNode
+        ppTotalVolumeBytes += collectiveMessageBytes * 2 * training.gradAccumSteps
+        ppTimePerStepMs +=
+          (bytesToGB(perLaneBytes) / bandwidthGBs) * 1000 * 2 * training.gradAccumSteps
+      }
+    }
+  }
+  const maxStageGradientBytes = Math.max(
+    ...Array.from(stageMemory.values()).map((stage) => stage.gradientsGB * BYTES_PER_GB),
+    0,
+  )
+  const dpGroupWidth =
+    parallelism.fsdpShardGroupSize > 1
+      ? derivedParallelism.replicaGroups
+      : derivedParallelism.dp
+  const dpMembers = parallelism.fsdpShardGroupSize > 1
+    ? placement.filter(
+        (entry) =>
+          entry.fsdpRank === 0 &&
+          entry.ppStage === 0 &&
+          entry.cpShard === 0 &&
+          entry.epLane === 0 &&
+          entry.tpLane === 0,
+      )
+    : placement.filter(
+        (entry) =>
+          entry.ppStage === 0 &&
+          entry.cpShard === 0 &&
+          entry.epLane === 0 &&
+          entry.tpLane === 0,
+      )
+  const gradientCommBytesPerGpu =
+    dpGroupWidth > 1
+      ? (2 * (dpGroupWidth - 1) * maxStageGradientBytes) / dpGroupWidth
+      : 0
+  const dpBandwidth = getMaxBandwidthForCollective(dpMembers, cluster)
+  const dpTimeMs =
+    dpGroupWidth > 1
+      ? (bytesToGB(gradientCommBytesPerGpu) / dpBandwidth.bandwidthGBs) * 1000
+      : 0
+  const canOverlapDp = dpGroupWidth > 1 && (parallelism.pp > 1 || training.gradAccumSteps > 1)
+  const dpNonOverlappedTimeMs = dpTimeMs * (canOverlapDp ? 0.35 : 1)
+  const communicationTimePerStepMs =
+    tpStats.timePerStepMs +
+    cpStats.timePerStepMs +
+    fsdpStats.timePerStepMs +
+    epStats.timePerStepMs +
+    ppTimePerStepMs +
+    dpNonOverlappedTimeMs
+  const totalStepTimeMs =
+    computeTimePerStepMs + pipelineBubbleTimeMs + communicationTimePerStepMs
+  const tokensPerSecond =
+    totalStepTimeMs > 0 ? globalBatchSizeTokens / (totalStepTimeMs / 1000) : 0
+  const mfu =
+    tokensPerSecond > 0
+      ? clamp(
+          (6 * modelBreakdown.activeParamsPerToken * attentionComputeMultiplier * tokensPerSecond) /
+            (launchedGPUs * peakTFLOPs * 1e12),
+          0,
+          1,
+        )
+      : 0
+  const dpLinkUtilizationPercent =
+    dpGroupWidth > 1 && totalStepTimeMs > 0
+      ? clamp(
+          (bytesToGB(gradientCommBytesPerGpu) /
+            (dpBandwidth.bandwidthGBs * (totalStepTimeMs / 1000))) *
+            100,
+          0,
+          100,
+        )
+      : 0
+  const ppPerLaneVolumeGB =
+    parallelism.pp > 1
+      ? bytesToGB(collectiveMessageBytes / parallelism.tp) * 2 * training.gradAccumSteps
+      : 0
+  const ppLinkUtilizationPercent =
+    parallelism.pp > 1 && totalStepTimeMs > 0
+      ? clamp(
+          (ppPerLaneVolumeGB /
+            ((ppUsesInterNode
+              ? cluster.interNodeBandwidthGBs
+              : cluster.intraNodeBandwidthGBs) *
+              (totalStepTimeMs / 1000))) *
+            100,
+          0,
+          100,
+        )
+      : 0
+  const links: ClusterAnalysis['links'] = []
+  const visualReplicaSamples = Math.min(derivedParallelism.dp, 12)
+  const sampledDpReplicas = Array.from({ length: visualReplicaSamples }, (_, sampleIndex) =>
+    Math.floor((sampleIndex * derivedParallelism.dp) / visualReplicaSamples),
+  )
+  for (const dpReplica of sampledDpReplicas) {
+    for (let ppStage = 0; ppStage < parallelism.pp; ppStage += 1) {
+      for (let cpShard = 0; cpShard < parallelism.cp; cpShard += 1) {
+        for (let epLane = 0; epLane < parallelism.ep; epLane += 1) {
+          const tpEntries = placement
+            .filter(
+              (entry) =>
+                entry.dpReplica === dpReplica &&
+                entry.ppStage === ppStage &&
+                entry.cpShard === cpShard &&
+                entry.epLane === epLane,
+            )
+            .sort((left, right) => left.tpLane - right.tpLane)
+          if (parallelism.tp > 1) {
+            for (let lane = 0; lane < tpEntries.length; lane += 1) {
+              const from = tpEntries[lane]
+              const to = tpEntries[(lane + 1) % tpEntries.length]
+              links.push({
+                fromGPU: from.globalGPUIndex,
+                toGPU: to.globalGPUIndex,
+                type: 'nvlink',
+                trafficType: 'tp',
+                volumeGB: round2(bytesToGB(tpStats.volumeBytesPerGpu)),
+                utilizationPercent: round2(tpStats.linkUtilizationPercent),
+              })
+            }
+          }
+          if (ppStage < parallelism.pp - 1) {
+            const nextTpEntries = placement
+              .filter(
+                (entry) =>
+                  entry.dpReplica === dpReplica &&
+                  entry.ppStage === ppStage + 1 &&
+                  entry.cpShard === cpShard &&
+                  entry.epLane === epLane,
+              )
+              .sort((left, right) => left.tpLane - right.tpLane)
+            for (let lane = 0; lane < Math.min(tpEntries.length, nextTpEntries.length); lane += 1) {
+              const from = tpEntries[lane]
+              const to = nextTpEntries[lane]
+              links.push({
+                fromGPU: from.globalGPUIndex,
+                toGPU: to.globalGPUIndex,
+                type: from.nodeIndex === to.nodeIndex ? 'nvlink' : 'infiniband',
+                trafficType: 'pp',
+                volumeGB: round2(ppPerLaneVolumeGB),
+                utilizationPercent: round2(ppLinkUtilizationPercent),
+              })
+            }
+          }
+        }
+      }
+      if (parallelism.cp > 1) {
+        for (let epLane = 0; epLane < parallelism.ep; epLane += 1) {
+        for (let tpLane = 0; tpLane < parallelism.tp; tpLane += 1) {
+          const cpEntries = placement
+            .filter(
+              (entry) =>
+                entry.dpReplica === dpReplica &&
+                entry.ppStage === ppStage &&
+                entry.epLane === epLane &&
+                entry.tpLane === tpLane,
+            )
+            .sort((left, right) => left.cpShard - right.cpShard)
+          for (let shardIndex = 0; shardIndex < cpEntries.length; shardIndex += 1) {
+            const from = cpEntries[shardIndex]
+            const to = cpEntries[(shardIndex + 1) % cpEntries.length]
+            links.push({
+              fromGPU: from.globalGPUIndex,
+              toGPU: to.globalGPUIndex,
+              type: from.nodeIndex === to.nodeIndex ? 'nvlink' : 'infiniband',
+              trafficType: 'cp',
+              volumeGB: round2(bytesToGB(cpStats.volumeBytesPerGpu)),
+              utilizationPercent: round2(cpStats.linkUtilizationPercent),
+            })
+          }
+        }
+        }
+      }
+      if (parallelism.ep > 1) {
+        for (let cpShard = 0; cpShard < parallelism.cp; cpShard += 1) {
+          for (let tpLane = 0; tpLane < parallelism.tp; tpLane += 1) {
+            const epEntries = placement
+              .filter(
+                (entry) =>
+                  entry.dpReplica === dpReplica &&
+                  entry.ppStage === ppStage &&
+                  entry.cpShard === cpShard &&
+                  entry.tpLane === tpLane,
+              )
+              .sort((left, right) => left.epLane - right.epLane)
+            for (let lane = 0; lane < epEntries.length; lane += 1) {
+              const from = epEntries[lane]
+              const to = epEntries[(lane + 1) % epEntries.length]
+              links.push({
+                fromGPU: from.globalGPUIndex,
+                toGPU: to.globalGPUIndex,
+                type: from.nodeIndex === to.nodeIndex ? 'nvlink' : 'infiniband',
+                trafficType: 'ep',
+                volumeGB: round2(
+                  epStats.totalVolumeBytes > 0
+                    ? bytesToGB(epStats.totalVolumeBytes) /
+                      (parallelism.ep *
+                        Math.max(parallelism.tp * parallelism.cp * parallelism.pp * derivedParallelism.dp, 1))
+                    : 0,
+                ),
+                utilizationPercent: round2(epStats.linkUtilizationPercent),
+              })
+            }
+          }
+        }
+      }
+      if (derivedParallelism.fsdpDataParallelDegree > 1) {
+        for (let cpShard = 0; cpShard < parallelism.cp; cpShard += 1) {
+          for (let epLane = 0; epLane < parallelism.ep; epLane += 1) {
+            for (let tpLane = 0; tpLane < parallelism.tp; tpLane += 1) {
+              const fsdpEntries = placement
+                .filter(
+                  (entry) =>
+                    entry.replicaGroup === placement.find((item) => item.dpReplica === dpReplica)?.replicaGroup &&
+                    entry.ppStage === ppStage &&
+                    entry.cpShard === cpShard &&
+                    entry.epLane === epLane &&
+                    entry.tpLane === tpLane,
+                )
+                .sort((left, right) => left.fsdpRank - right.fsdpRank)
+              for (let rank = 0; rank < fsdpEntries.length; rank += 1) {
+                const from = fsdpEntries[rank]
+                const to = fsdpEntries[(rank + 1) % fsdpEntries.length]
+                links.push({
+                  fromGPU: from.globalGPUIndex,
+                  toGPU: to.globalGPUIndex,
+                  type: from.nodeIndex === to.nodeIndex ? 'nvlink' : 'infiniband',
+                  trafficType: 'fsdp',
+                  volumeGB: round2(bytesToGB(fsdpStats.volumeBytesPerGpu)),
+                  utilizationPercent: round2(fsdpStats.linkUtilizationPercent),
+                })
+              }
+            }
+          }
+        }
+      }
+      if (dpGroupWidth > 1) {
+        for (let cpShard = 0; cpShard < parallelism.cp; cpShard += 1) {
+          for (let epLane = 0; epLane < parallelism.ep; epLane += 1) {
+            for (let tpLane = 0; tpLane < parallelism.tp; tpLane += 1) {
+              const current = placement.find((entry) => entry.dpReplica === dpReplica)
+              if (!current) {
+                continue
+              }
+              const from = getPlacementEntry(placement, {
+                replicaGroup:
+                  parallelism.fsdpShardGroupSize > 1 ? current.replicaGroup : undefined,
+                fsdpRank: parallelism.fsdpShardGroupSize > 1 ? current.fsdpRank : undefined,
+                dpReplica: parallelism.fsdpShardGroupSize > 1 ? undefined : dpReplica,
+                ppStage,
+                cpShard,
+                epLane,
+                tpLane,
+              })
+              const to = getPlacementEntry(placement, {
+                replicaGroup:
+                  parallelism.fsdpShardGroupSize > 1
+                    ? (current.replicaGroup + 1) % derivedParallelism.replicaGroups
+                    : undefined,
+                fsdpRank: parallelism.fsdpShardGroupSize > 1 ? current.fsdpRank : undefined,
+                dpReplica:
+                  parallelism.fsdpShardGroupSize > 1
+                    ? undefined
+                    : (dpReplica + 1) % derivedParallelism.dp,
+                ppStage,
+                cpShard,
+                epLane,
+                tpLane,
+              })
+              if (!from || !to) {
+                continue
+              }
+              links.push({
+                fromGPU: from.globalGPUIndex,
+                toGPU: to.globalGPUIndex,
+                type: from.nodeIndex === to.nodeIndex ? 'nvlink' : 'infiniband',
+                trafficType: 'dp',
+                volumeGB: round2(bytesToGB(gradientCommBytesPerGpu)),
+                utilizationPercent: round2(dpLinkUtilizationPercent),
+              })
+            }
+          }
+        }
+      }
+    }
+  }
+  const feasible = worstStageMemory.totalGB <= cluster.gpuType.hbmCapacityGB
+  const infeasibilityReason = feasible
+    ? undefined
+    : `Stage ${worstStageIndex} uses ${round2(worstStageMemory.totalGB)} GB per GPU, exceeding ${cluster.gpuType.hbmCapacityGB} GB of HBM.`
+  return {
+    feasible,
+    infeasibilityReason,
+    totalParams: Math.round(modelBreakdown.totalParams),
+    activeParamsPerToken: Math.round(modelBreakdown.activeParamsPerToken),
+    globalBatchSizeTokens,
+    totalGPUs,
+    derivedParallelism: {
+      dp: derivedParallelism.dp,
+      replicaGroups: derivedParallelism.replicaGroups,
+      fsdpShardGroupSize: parallelism.fsdpShardGroupSize,
+      fsdpGroupSize: derivedParallelism.fsdpGroupSize,
+      ep: parallelism.ep,
+    },
+    memoryBreakdown: {
+      parametersGB: round2(worstStageMemory.parametersGB),
+      optimizerStatesGB: round2(worstStageMemory.optimizerStatesGB),
+      gradientsGB: round2(worstStageMemory.gradientsGB),
+      activationsGB: round2(worstStageMemory.activationsGB),
+      totalGB: round2(worstStageMemory.totalGB),
+      hbmCapacityGB: cluster.gpuType.hbmCapacityGB,
+      utilizationPercent: round2(
+        (worstStageMemory.totalGB / cluster.gpuType.hbmCapacityGB) * 100,
+      ),
+    },
+    pipelineStages,
+    communication: {
+      tp: {
+        allReducesPerLayer: TP_ALL_REDUCES_PER_LAYER,
+        messageSizeBytes: collectiveMessageBytes,
+        totalVolumePerStepGB: round2(bytesToGB(tpStats.totalVolumeBytes)),
+        timePerStepMs: round2(tpStats.timePerStepMs),
+        linkUtilizationPercent: round2(tpStats.linkUtilizationPercent),
+      },
+      pp: {
+        activationMessageSizeBytes: collectiveMessageBytes,
+        numP2PTransfersPerStep:
+          parallelism.pp > 1
+            ? 2 *
+              (parallelism.pp - 1) *
+              training.gradAccumSteps *
+              parallelism.cp *
+              parallelism.tp *
+              derivedParallelism.dp
+            : 0,
+        totalVolumePerStepGB: round2(bytesToGB(ppTotalVolumeBytes)),
+        timePerStepMs: round2(ppTimePerStepMs),
+        usesInterNode: ppUsesInterNode,
+      },
+      cp: {
+        collectivesPerLayer: CP_COLLECTIVES_PER_LAYER,
+        messageSizeBytes: collectiveMessageBytes,
+        totalVolumePerStepGB: round2(bytesToGB(cpStats.totalVolumeBytes)),
+        timePerStepMs: round2(cpStats.timePerStepMs),
+        linkUtilizationPercent: round2(cpStats.linkUtilizationPercent),
+        usesInterNode: cpStats.usesInterNode,
+      },
+      fsdp: {
+        collectivesPerLayer: FSDP_COLLECTIVES_PER_LAYER,
+        messageSizeBytes: round2(fsdpMessageBytes),
+        totalVolumePerStepGB: round2(bytesToGB(fsdpStats.totalVolumeBytes)),
+        timePerStepMs: round2(fsdpStats.timePerStepMs),
+        linkUtilizationPercent: round2(fsdpStats.linkUtilizationPercent),
+        usesInterNode: fsdpStats.usesInterNode,
+      },
+      ep: {
+        allToAllsPerLayer: EP_ALL_TO_ALLS_PER_LAYER,
+        messageSizeBytes: round2(epMessageBytes),
+        totalVolumePerStepGB: round2(bytesToGB(epStats.totalVolumeBytes)),
+        timePerStepMs: round2(epStats.timePerStepMs),
+        linkUtilizationPercent: round2(epStats.linkUtilizationPercent),
+        usesInterNode: epStats.usesInterNode,
+      },
+      dp: {
+        gradientVolumePerGPU_GB: round2(bytesToGB(gradientCommBytesPerGpu)),
+        allReduceTimeMs: round2(dpTimeMs),
+        canOverlapWithBackward: canOverlapDp,
+        linkUtilizationPercent: round2(dpLinkUtilizationPercent),
+      },
+    },
+    throughput: {
+      computeTimePerStepMs: round2(computeTimePerStepMs),
+      communicationTimePerStepMs: round2(communicationTimePerStepMs),
+      pipelineBubbleFraction: round2(pipelineBubbleFraction),
+      pipelineBubbleTimeMs: round2(pipelineBubbleTimeMs),
+      totalStepTimeMs: round2(totalStepTimeMs),
+      tokensPerSecond: round2(tokensPerSecond),
+      mfu: round2(mfu),
+    },
+    gpuMap,
+    links,
+  }
+}
+export const llama7B = (): ModelConfig => ({
+  architecture: 'dense',
+  hiddenDim: 4096,
+  numLayers: 32,
+  numHeads: 32,
+  numKVHeads: 32,
+  vocabSize: 32000,
+  intermediateSize: 11008,
+  tiedEmbeddings: false,
+  attentionProfile: {
+    type: 'full',
+  },
+})
+export const llama70B = (): ModelConfig => ({
+  architecture: 'dense',
+  hiddenDim: 8192,
+  numLayers: 80,
+  numHeads: 64,
+  numKVHeads: 8,
+  vocabSize: 32000,
+  intermediateSize: 28672,
+  tiedEmbeddings: false,
+  attentionProfile: {
+    type: 'full',
+  },
+})
+export const llama405B = (): ModelConfig => ({
+  architecture: 'dense',
+  hiddenDim: 16384,
+  numLayers: 126,
+  numHeads: 128,
+  numKVHeads: 8,
+  vocabSize: 128256,
+  intermediateSize: 53248,
+  tiedEmbeddings: false,
+  attentionProfile: {
+    type: 'full',
+  },
+})
+export const olmo3_32B = (): ModelConfig => ({
+  architecture: 'dense',
+  hiddenDim: 5120,
+  numLayers: 64,
+  numHeads: 40,
+  numKVHeads: 8,
+  vocabSize: 100278,
+  intermediateSize: 27648,
+  tiedEmbeddings: false,
+  attentionProfile: {
+    type: 'hybrid',
+    slidingWindowSize: 4096,
+    globalAttentionFraction: 0.25,
+  },
+})
+export const llama31_405B = (): ModelConfig => ({
+  architecture: 'dense',
+  hiddenDim: 16384,
+  numLayers: 126,
+  numHeads: 128,
+  numKVHeads: 8,
+  vocabSize: 128256,
+  intermediateSize: 53248,
+  tiedEmbeddings: false,
+  attentionProfile: {
+    type: 'full',
+  },
+})
+export const trinityLarge400B = (): ModelConfig => ({
+  architecture: 'moe',
+  hiddenDim: 3072,
+  numLayers: 60,
+  numHeads: 48,
+  numKVHeads: 8,
+  vocabSize: 200192,
+  intermediateSize: 12288,
+  tiedEmbeddings: false,
+  attentionProfile: {
+    type: 'hybrid',
+    slidingWindowSize: 4096,
+    globalAttentionEveryN: 4,
+  },
+  moe: {
+    numExperts: 256,
+    expertsPerToken: 4,
+    numDenseLayers: 6,
+    expertIntermediateSize: 3072,
+    activeParamsPerToken: 13_000_000_000,
+  },
+})
+export const a100_80gb = (): GPUSpec => ({
+  name: 'A100 80GB',
+  hbmCapacityGB: 80,
+  peakTFLOPsBF16: 312,
+  memBandwidthTBs: 2,
+})
+export const h100_sxm = (): GPUSpec => ({
+  name: 'H100 SXM',
+  hbmCapacityGB: 80,
+  peakTFLOPsBF16: 989,
+  memBandwidthTBs: 3.35,
+})
+export const b300 = (): GPUSpec => ({
+  name: 'B300',
+  hbmCapacityGB: 192,
+  peakTFLOPsBF16: 2250,
+  memBandwidthTBs: 8,
+})
+export const gb200 = (): GPUSpec => ({
+  name: 'GB200',
+  hbmCapacityGB: 192,
+  peakTFLOPsBF16: 2250,
+  memBandwidthTBs: 8,
+})
+export const singleNode8GPU = (gpuType: GPUSpec = a100_80gb()): ClusterConfig => {
+  const fabric = getDefaultFabric(gpuType)
+  return {
+    gpuType,
+    gpusPerNode: 8,
+    numNodes: 1,
+    intraNodeBandwidthGBs: fabric.intraNodeBandwidthGBs,
+    interNodeBandwidthGBs: fabric.interNodeBandwidthGBs,
+    nodesPerRack: 1,
+    rackLabel: 'node',
+    nodeLabel: 'GPU host',
+    podLabel: 'node',
+  }
+}
+export const cluster64GPU = (gpuType: GPUSpec = h100_sxm()): ClusterConfig => {
+  const fabric = getDefaultFabric(gpuType)
+  return {
+    gpuType,
+    gpusPerNode: 8,
+    numNodes: 8,
+    intraNodeBandwidthGBs: fabric.intraNodeBandwidthGBs,
+    interNodeBandwidthGBs: fabric.interNodeBandwidthGBs,
+    nodesPerRack: 4,
+    rackLabel: 'rack',
+    nodeLabel: 'GPU host',
+    podLabel: 'rack',
+  }
+}
+export const frontier576GPU = (): ClusterConfig => {
+  const gpuType = gb200()
+  const fabric = getDefaultFabric(gpuType)
+  return {
+    gpuType,
+    gpusPerNode: 8,
+    numNodes: 72,
+    intraNodeBandwidthGBs: fabric.intraNodeBandwidthGBs,
+    interNodeBandwidthGBs: fabric.interNodeBandwidthGBs,
+    nodesPerRack: 9,
+    rackLabel: 'NVL72 rack',
+    nodeLabel: 'compute tray',
+    podLabel: 'rack',
+  }
+}

src/lib/viewOptions.ts ADDED Viewed

	@@ -0,0 +1,50 @@

+import {
+  getScenarioWorkbenchConfig,
+  type WorkbenchConfig,
+  type WorkbenchScenarioId,
+} from './workbench'
+export type ViewOptions = {
+  debug: boolean
+  snapshot: boolean
+  scenario: WorkbenchScenarioId
+}
+const SCENARIOS = new Set<WorkbenchScenarioId>([
+  'default',
+  'olmo-pretraining',
+  'olmo-long-context',
+  'llama-pretraining',
+  'llama-long-context',
+  'trinity-pretraining',
+  'trinity-long-context',
+  'infeasible-memory',
+])
+const truthyValues = new Set(['1', 'true', 'yes', 'on'])
+function isTruthy(value: string | null) {
+  if (value === null) {
+    return false
+  }
+  return truthyValues.has(value.toLowerCase())
+}
+export function getViewOptions(search = window.location.search): ViewOptions {
+  const params = new URLSearchParams(search)
+  const scenarioParam = params.get('scenario')
+  const scenario = SCENARIOS.has(scenarioParam as WorkbenchScenarioId)
+    ? (scenarioParam as WorkbenchScenarioId)
+    : 'default'
+  return {
+    debug: isTruthy(params.get('debug')),
+    snapshot: isTruthy(params.get('snapshot')),
+    scenario,
+  }
+}
+export function getScenarioConfig(scenario: WorkbenchScenarioId): WorkbenchConfig {
+  return getScenarioWorkbenchConfig(scenario)
+}

src/lib/workbench.ts ADDED Viewed

	@@ -0,0 +1,395 @@

+import {
+  a100_80gb,
+  b300,
+  gb200,
+  h100_sxm,
+  llama31_405B,
+  olmo3_32B,
+  trinityLarge400B,
+  type ClusterConfig,
+  type GPUSpec,
+  type ModelConfig,
+  type ParallelismConfig,
+  type TrainingConfig,
+} from './trainingClusterModel'
+export type ExamplePresetId = 'olmo3-32b' | 'llama31-405b' | 'trinity-large-400b'
+export type ExamplePhaseId = 'pretraining' | 'long-context'
+export type GpuPresetId = 'a100-80gb' | 'h100-sxm' | 'b300' | 'gb200'
+export type WorkbenchScenarioId =
+  | 'default'
+  | 'olmo-pretraining'
+  | 'olmo-long-context'
+  | 'llama-pretraining'
+  | 'llama-long-context'
+  | 'trinity-pretraining'
+  | 'trinity-long-context'
+  | 'infeasible-memory'
+export type WorkbenchConfig = {
+  examplePresetId: ExamplePresetId
+  phaseId: ExamplePhaseId
+  customized: boolean
+  model: ModelConfig
+  training: TrainingConfig
+  cluster: ClusterConfig
+  parallelism: ParallelismConfig
+}
+type ExamplePhaseConfig = {
+  cluster: ClusterConfig
+  training: TrainingConfig
+  parallelism: ParallelismConfig
+}
+type ExamplePreset = {
+  label: string
+  model: () => ModelConfig
+  phases: Record<ExamplePhaseId, ExamplePhaseConfig>
+}
+const GPU_PRESETS: Record<GpuPresetId, { label: string; spec: () => GPUSpec }> = {
+  'a100-80gb': {
+    label: 'A100 80GB',
+    spec: a100_80gb,
+  },
+  'h100-sxm': {
+    label: 'H100 SXM',
+    spec: h100_sxm,
+  },
+  b300: {
+    label: 'B300',
+    spec: b300,
+  },
+  gb200: {
+    label: 'GB200',
+    spec: gb200,
+  },
+}
+const gpuPresetMatches = (candidate: GPUSpec, preset: GPUSpec) =>
+  candidate.name === preset.name &&
+  candidate.hbmCapacityGB === preset.hbmCapacityGB &&
+  candidate.peakTFLOPsBF16 === preset.peakTFLOPsBF16 &&
+  candidate.memBandwidthTBs === preset.memBandwidthTBs
+const h100Cluster = (numNodes: number, nodesPerRack: number): ClusterConfig => ({
+  gpuType: h100_sxm(),
+  gpusPerNode: 8,
+  numNodes,
+  intraNodeBandwidthGBs: 900,
+  interNodeBandwidthGBs: 50,
+  nodesPerRack,
+  rackLabel: 'rack',
+  nodeLabel: 'GPU host',
+  podLabel: 'rack',
+})
+const b300Cluster = (numNodes: number, nodesPerRack: number): ClusterConfig => ({
+  gpuType: b300(),
+  gpusPerNode: 8,
+  numNodes,
+  intraNodeBandwidthGBs: 900,
+  interNodeBandwidthGBs: 50,
+  nodesPerRack,
+  rackLabel: 'rack',
+  nodeLabel: 'GPU host',
+  podLabel: 'rack',
+})
+export const EXAMPLE_PRESETS: Record<ExamplePresetId, ExamplePreset> = {
+  'olmo3-32b': {
+    label: 'OLMo 3 32B',
+    model: olmo3_32B,
+    phases: {
+      pretraining: {
+        cluster: h100Cluster(128, 16),
+        training: {
+          microBatchSize: 1,
+          seqLength: 8192,
+          gradAccumSteps: 1,
+          precision: 'bf16',
+          activationCheckpointing: true,
+          optimizer: 'adamw',
+        },
+        parallelism: {
+          tp: 1,
+          pp: 1,
+          cp: 1,
+          ep: 1,
+          distributedOptimizer: true,
+          fsdpShardGroupSize: 256,
+          zeroStage: 3,
+        },
+      },
+      'long-context': {
+        cluster: h100Cluster(32, 8),
+        training: {
+          microBatchSize: 1,
+          seqLength: 65536,
+          gradAccumSteps: 1,
+          precision: 'bf16',
+          activationCheckpointing: true,
+          optimizer: 'adamw',
+        },
+        parallelism: {
+          tp: 1,
+          pp: 1,
+          cp: 8,
+          ep: 1,
+          distributedOptimizer: true,
+          fsdpShardGroupSize: 256,
+          zeroStage: 3,
+        },
+      },
+    },
+  },
+  'llama31-405b': {
+    label: 'Llama 3.1 405B',
+    model: llama31_405B,
+    phases: {
+      pretraining: {
+        cluster: h100Cluster(2048, 16),
+        training: {
+          microBatchSize: 1,
+          seqLength: 8192,
+          gradAccumSteps: 16,
+          precision: 'bf16',
+          activationCheckpointing: true,
+          optimizer: 'adamw',
+        },
+        parallelism: {
+          tp: 8,
+          pp: 16,
+          cp: 1,
+          ep: 1,
+          distributedOptimizer: true,
+          fsdpShardGroupSize: 0,
+          zeroStage: 1,
+        },
+      },
+      'long-context': {
+        cluster: h100Cluster(2048, 16),
+        training: {
+          microBatchSize: 1,
+          seqLength: 131072,
+          gradAccumSteps: 1,
+          precision: 'bf16',
+          activationCheckpointing: true,
+          optimizer: 'adamw',
+        },
+        parallelism: {
+          tp: 8,
+          pp: 16,
+          cp: 16,
+          ep: 1,
+          distributedOptimizer: true,
+          fsdpShardGroupSize: 0,
+          zeroStage: 1,
+        },
+      },
+    },
+  },
+  'trinity-large-400b': {
+    label: 'Trinity Large 400B',
+    model: trinityLarge400B,
+    phases: {
+      pretraining: {
+        cluster: b300Cluster(256, 9),
+        training: {
+          microBatchSize: 1,
+          seqLength: 8192,
+          gradAccumSteps: 8,
+          precision: 'bf16',
+          activationCheckpointing: true,
+          optimizer: 'muon',
+        },
+        parallelism: {
+          tp: 1,
+          pp: 1,
+          cp: 1,
+          ep: 8,
+          distributedOptimizer: true,
+          fsdpShardGroupSize: 128,
+          zeroStage: 3,
+        },
+      },
+      'long-context': {
+        cluster: b300Cluster(256, 9),
+        training: {
+          microBatchSize: 1,
+          seqLength: 262144,
+          gradAccumSteps: 1,
+          precision: 'bf16',
+          activationCheckpointing: true,
+          optimizer: 'muon',
+        },
+        parallelism: {
+          tp: 1,
+          pp: 1,
+          cp: 4,
+          ep: 8,
+          distributedOptimizer: true,
+          fsdpShardGroupSize: 128,
+          zeroStage: 3,
+        },
+      },
+    },
+  },
+}
+const createWorkbenchConfig = (
+  examplePresetId: ExamplePresetId,
+  phaseId: ExamplePhaseId,
+): WorkbenchConfig => {
+  const preset = EXAMPLE_PRESETS[examplePresetId]
+  const phase = preset.phases[phaseId]
+  return {
+    examplePresetId,
+    phaseId,
+    customized: false,
+    model: preset.model(),
+    training: { ...phase.training },
+    cluster: { ...phase.cluster },
+    parallelism: { ...phase.parallelism },
+  }
+}
+const SCENARIOS: Record<WorkbenchScenarioId, WorkbenchConfig> = {
+  default: createWorkbenchConfig('olmo3-32b', 'pretraining'),
+  'olmo-pretraining': createWorkbenchConfig('olmo3-32b', 'pretraining'),
+  'olmo-long-context': createWorkbenchConfig('olmo3-32b', 'long-context'),
+  'llama-pretraining': createWorkbenchConfig('llama31-405b', 'pretraining'),
+  'llama-long-context': createWorkbenchConfig('llama31-405b', 'long-context'),
+  'trinity-pretraining': createWorkbenchConfig('trinity-large-400b', 'pretraining'),
+  'trinity-long-context': createWorkbenchConfig('trinity-large-400b', 'long-context'),
+  'infeasible-memory': {
+    examplePresetId: 'llama31-405b',
+    phaseId: 'pretraining',
+    customized: false,
+    model: llama31_405B(),
+    training: {
+      microBatchSize: 1,
+      seqLength: 8192,
+      gradAccumSteps: 1,
+      precision: 'bf16',
+      activationCheckpointing: true,
+      optimizer: 'adamw',
+    },
+    cluster: h100Cluster(8, 4),
+    parallelism: {
+      tp: 8,
+      pp: 1,
+      cp: 1,
+      ep: 1,
+      distributedOptimizer: false,
+      fsdpShardGroupSize: 0,
+      zeroStage: 0,
+    },
+  },
+}
+const cloneModel = (model: ModelConfig): ModelConfig => ({
+  ...model,
+  attentionProfile: model.attentionProfile ? { ...model.attentionProfile } : undefined,
+  moe: model.moe ? { ...model.moe } : undefined,
+})
+const cloneTraining = (training: TrainingConfig): TrainingConfig => ({ ...training })
+const cloneCluster = (cluster: ClusterConfig): ClusterConfig => ({ ...cluster })
+const cloneParallelism = (parallelism: ParallelismConfig): ParallelismConfig => ({
+  ...parallelism,
+})
+export const cloneWorkbenchConfig = (config: WorkbenchConfig): WorkbenchConfig => ({
+  examplePresetId: config.examplePresetId,
+  phaseId: config.phaseId,
+  customized: config.customized,
+  model: cloneModel(config.model),
+  training: cloneTraining(config.training),
+  cluster: cloneCluster(config.cluster),
+  parallelism: cloneParallelism(config.parallelism),
+})
+export function getScenarioWorkbenchConfig(scenario: WorkbenchScenarioId) {
+  return cloneWorkbenchConfig(SCENARIOS[scenario])
+}
+export function getExamplePresetOptions() {
+  return Object.entries(EXAMPLE_PRESETS)
+    .filter(([id]) => id !== 'llama31-405b')
+    .map(([id, preset]) => ({
+      id: id as ExamplePresetId,
+      label: preset.label,
+    }))
+}
+export function getPhaseOptions(examplePresetId: ExamplePresetId) {
+  const preset = EXAMPLE_PRESETS[examplePresetId]
+  return Object.keys(preset.phases).map((phaseId) => ({
+    id: phaseId as ExamplePhaseId,
+    label: phaseId === 'pretraining' ? 'Pretraining' : 'Long-context',
+  }))
+}
+export function getExampleLabel(examplePresetId: ExamplePresetId) {
+  return EXAMPLE_PRESETS[examplePresetId].label
+}
+export function getGpuPresetOptions() {
+  return Object.entries(GPU_PRESETS).map(([id, preset]) => ({
+    id: id as GpuPresetId,
+    label: preset.label,
+  }))
+}
+export function getGpuPresetId(gpuType: GPUSpec): GpuPresetId | 'custom' {
+  for (const [id, preset] of Object.entries(GPU_PRESETS)) {
+    if (gpuPresetMatches(gpuType, preset.spec())) {
+      return id as GpuPresetId
+    }
+  }
+  return 'custom'
+}
+export function applyGpuPreset(config: WorkbenchConfig, gpuPresetId: GpuPresetId): WorkbenchConfig {
+  return {
+    ...config,
+    customized: true,
+    cluster: {
+      ...config.cluster,
+      gpuType: GPU_PRESETS[gpuPresetId].spec(),
+    },
+  }
+}
+export function applyExamplePreset(
+  _config: WorkbenchConfig,
+  examplePresetId: ExamplePresetId,
+): WorkbenchConfig {
+  return createWorkbenchConfig(examplePresetId, 'pretraining')
+}
+export function applyExamplePhase(
+  config: WorkbenchConfig,
+  phaseId: ExamplePhaseId,
+): WorkbenchConfig {
+  return createWorkbenchConfig(config.examplePresetId, phaseId)
+}
+export function getFactorOptions(total: number, currentValue: number) {
+  const factors = new Set<number>([currentValue])
+  for (let candidate = 1; candidate <= total; candidate += 1) {
+    if (total % candidate === 0) {
+      factors.add(candidate)
+    }
+  }
+  return Array.from(factors).sort((left, right) => left - right)
+}

src/lib/workbenchPresenter.ts ADDED Viewed

	@@ -0,0 +1,220 @@

+import { type ClusterAnalysis } from './trainingClusterModel'
+import { getExampleLabel, type WorkbenchConfig } from './workbench'
+export type WorkbenchViewModel = {
+  config: WorkbenchConfig
+  analysis: ClusterAnalysis
+  structuralIssue: boolean
+  warnings: string[]
+  headline: string
+  subheadline: string
+  summary: {
+    throughputLabel: string
+    throughputNote: string
+    gpuLabel: string
+    gpuNote: string
+    interconnectLabel: string
+    interconnectNote: string
+    bottleneckLabel: string
+    bottleneckNote: string
+  }
+  facts: Array<{ label: string; value: string }>
+}
+const formatInteger = (value: number) => Math.round(value).toLocaleString()
+const formatPercent = (value: number) => `${Math.round(value * 100)}%`
+const formatPercentWhole = (value: number) => `${Math.round(value)}%`
+const formatGB = (value: number) => `${value.toFixed(value >= 100 ? 0 : 1)} GB`
+const getAllocatedGpuCount = (analysis: ClusterAnalysis) =>
+  analysis.gpuMap.filter((gpu) => gpu.memoryUsedGB > 0).length
+const getActiveGpuCount = (analysis: ClusterAnalysis) =>
+  analysis.gpuMap.filter((gpu) => gpu.isActive).length
+const getDominantCommLabel = (analysis: ClusterAnalysis) => {
+  const entries = [
+    ['TP collectives', analysis.communication.tp.timePerStepMs],
+    ['PP activations', analysis.communication.pp.timePerStepMs],
+    ['CP sequence exchange', analysis.communication.cp.timePerStepMs],
+    ['FSDP sharding', analysis.communication.fsdp.timePerStepMs],
+    ['EP routing', analysis.communication.ep.timePerStepMs],
+    ['DP sync', analysis.communication.dp.allReduceTimeMs],
+    ['Pipeline bubble', analysis.throughput.pipelineBubbleTimeMs],
+  ] as const
+  return [...entries].sort((left, right) => right[1] - left[1])[0][0]
+}
+export function buildWorkbenchViewModel(
+  config: WorkbenchConfig,
+  analysis: ClusterAnalysis,
+): WorkbenchViewModel {
+  const requestedGpuCount =
+    config.parallelism.tp *
+    config.parallelism.pp *
+    config.parallelism.cp *
+    config.parallelism.ep *
+    analysis.derivedParallelism.dp
+  const allocatedGpuCount = getAllocatedGpuCount(analysis)
+  const activeGpuCount = getActiveGpuCount(analysis)
+  const totalGPUs = analysis.totalGPUs
+  const launchedGpuCount =
+    analysis.throughput.totalStepTimeMs > 0 ? Math.min(requestedGpuCount, totalGPUs) : 0
+  const darkGpuCount = Math.max(totalGPUs - launchedGpuCount, 0)
+  const nodesPerRack = config.cluster.nodesPerRack ?? config.cluster.numNodes
+  const rackCount = Math.ceil(config.cluster.numNodes / nodesPerRack)
+  const rackLabel = config.cluster.rackLabel ?? 'rack'
+  const nodeLabel = config.cluster.nodeLabel ?? 'node'
+  const structuralIssue = !analysis.feasible && analysis.throughput.totalStepTimeMs === 0
+  const warnings: string[] = []
+  if (!analysis.feasible && analysis.infeasibilityReason) {
+    warnings.push(analysis.infeasibilityReason)
+  }
+  if (structuralIssue) {
+    warnings.push('This layout is structurally invalid, so throughput and communication are not estimated.')
+  } else if (!analysis.feasible) {
+    warnings.push('The run is memory-infeasible, but the app still shows the attempted placement and estimated traffic.')
+  }
+  if (analysis.memoryBreakdown.utilizationPercent >= 92) {
+    warnings.push(
+      `Worst-case GPU HBM is ${formatPercentWhole(analysis.memoryBreakdown.utilizationPercent)} full.`,
+    )
+  }
+  if (analysis.throughput.pipelineBubbleFraction >= 0.18) {
+    warnings.push(
+      `Pipeline bubble is ${formatPercent(analysis.throughput.pipelineBubbleFraction)} of step time.`,
+    )
+  }
+  if (config.parallelism.cp > 1) {
+    warnings.push(
+      `CP shards each micro-batch into ${config.parallelism.cp} sequence slices and adds sequence exchange traffic.`,
+    )
+  }
+  if (config.parallelism.fsdpShardGroupSize > 1) {
+    warnings.push(
+      `HSDP shards weights across ${config.parallelism.fsdpShardGroupSize.toLocaleString()}-GPU groups, with ${analysis.derivedParallelism.replicaGroups} replica groups syncing once per step.`,
+    )
+  }
+  if (config.parallelism.ep > 1) {
+    warnings.push(
+      `EP routes tokens across ${config.parallelism.ep} expert lanes and adds expert all-to-all traffic.`,
+    )
+  }
+  if (!structuralIssue && darkGpuCount > 0) {
+    warnings.push(
+      `${darkGpuCount.toLocaleString()} GPUs are dark because this launch only uses ${launchedGpuCount.toLocaleString()} ranks.`,
+    )
+  }
+  const throughputLabel = structuralIssue
+    ? 'n/a'
+    : formatInteger(analysis.throughput.tokensPerSecond)
+  const throughputNote = structuralIssue
+    ? 'structural constraint violated'
+    : !analysis.feasible
+      ? 'estimated despite HBM overflow'
+      : 'tokens / second'
+  const interconnectUtilization = Math.max(
+    analysis.communication.tp.linkUtilizationPercent,
+    analysis.communication.pp.usesInterNode
+      ? analysis.communication.pp.timePerStepMs > 0
+        ? analysis.communication.pp.totalVolumePerStepGB > 0
+          ? Math.min(
+              100,
+              (analysis.communication.pp.totalVolumePerStepGB /
+                (config.cluster.interNodeBandwidthGBs *
+                  (analysis.throughput.totalStepTimeMs / 1000 || 1))) *
+                100,
+            )
+          : 0
+        : 0
+      : 0,
+    analysis.communication.cp.linkUtilizationPercent,
+    analysis.communication.fsdp.linkUtilizationPercent,
+    analysis.communication.ep.linkUtilizationPercent,
+    analysis.communication.dp.linkUtilizationPercent,
+  )
+  const headlineGpuLabel =
+    structuralIssue || launchedGpuCount === totalGPUs
+      ? `${totalGPUs.toLocaleString()} GPUs`
+      : `${launchedGpuCount.toLocaleString()} of ${totalGPUs.toLocaleString()} GPUs`
+  return {
+    config,
+    analysis,
+    structuralIssue,
+    warnings,
+    headline:
+      `${getExampleLabel(config.examplePresetId)}${config.customized ? ' (customized)' : ''} · ` +
+      `${config.phaseId} on ${headlineGpuLabel}`,
+    subheadline:
+      `${formatInteger(analysis.totalParams)} total params, ${formatInteger(analysis.activeParamsPerToken)} active params, ` +
+      `${config.model.numLayers} layers, ` +
+      `${rackCount} ${rackLabel}${rackCount === 1 ? '' : 's'} of ${config.cluster.gpuType.name}.`,
+    summary: {
+      throughputLabel,
+      throughputNote,
+      gpuLabel: `${activeGpuCount}/${launchedGpuCount || allocatedGpuCount || totalGPUs}`,
+      gpuNote:
+        structuralIssue
+          ? 'launch invalid'
+          : launchedGpuCount === totalGPUs
+          ? 'active in this placement'
+          : `${launchedGpuCount}/${totalGPUs} launched on this cluster`,
+      interconnectLabel: formatPercentWhole(interconnectUtilization),
+      interconnectNote: 'peak link utilization',
+      bottleneckLabel: analysis.feasible ? getDominantCommLabel(analysis) : 'HBM capacity',
+      bottleneckNote: analysis.feasible
+        ? `${formatGB(analysis.memoryBreakdown.totalGB)} on the hottest GPU`
+        : analysis.infeasibilityReason ?? 'constraint violation',
+    },
+    facts: [
+      {
+        label: 'Model',
+        value: `${formatInteger(analysis.totalParams)} params`,
+      },
+      {
+        label: 'Context',
+        value: `${config.training.seqLength.toLocaleString()} tokens`,
+      },
+      {
+        label: 'Global batch',
+        value: `${analysis.globalBatchSizeTokens.toLocaleString()} tokens / step`,
+      },
+      {
+        label: 'Topology',
+        value: `${config.cluster.numNodes} ${nodeLabel}${config.cluster.numNodes === 1 ? '' : 's'}`,
+      },
+      {
+        label: 'Parallelism',
+        value:
+          `TP ${config.parallelism.tp} · PP ${config.parallelism.pp} · ` +
+          `CP ${config.parallelism.cp} · EP ${config.parallelism.ep} · DP ${analysis.derivedParallelism.dp}`,
+      },
+      {
+        label: 'Replica groups',
+        value: `${analysis.derivedParallelism.replicaGroups} groups`,
+      },
+      {
+        label: 'FSDP group',
+        value:
+          config.parallelism.fsdpShardGroupSize > 1
+            ? `${config.parallelism.fsdpShardGroupSize.toLocaleString()} GPUs`
+            : 'disabled',
+      },
+      {
+        label: 'HBM headroom',
+        value: `${formatGB(config.cluster.gpuType.hbmCapacityGB - analysis.memoryBreakdown.totalGB)}`,
+      },
+    ],
+  }
+}

src/main.tsx ADDED Viewed

	@@ -0,0 +1,15 @@

+import { StrictMode } from 'react'
+import { createRoot } from 'react-dom/client'
+import '@fontsource/space-grotesk/400.css'
+import '@fontsource/space-grotesk/500.css'
+import '@fontsource/space-grotesk/700.css'
+import '@fontsource/ibm-plex-mono/400.css'
+import '@fontsource/ibm-plex-mono/500.css'
+import './index.css'
+import App from './App.tsx'
+createRoot(document.getElementById('root')!).render(
+  <StrictMode>
+    <App />
+  </StrictMode>,
+)

src/types/global.d.ts ADDED Viewed

	@@ -0,0 +1,49 @@

+export {}
+declare global {
+  type TopologyDebugState = {
+    ready: boolean
+    viewport: {
+      x: number
+      y: number
+      scale: number
+    }
+    surfaceSize: {
+      width: number
+      height: number
+    }
+    objectCounts: {
+      pods: number
+      nodes: number
+      gpus: number
+      links: number
+      activeGpus: number
+      contextualNodes: number
+    }
+    objects: Record<
+      string,
+      {
+        x: number
+        y: number
+        width: number
+        height: number
+      }
+    >
+    hoveredTarget: {
+      kind: 'pod' | 'node' | 'gpu' | 'link'
+      id: string
+    } | null
+    pinnedTarget: {
+      kind: 'pod' | 'node' | 'gpu' | 'link'
+      id: string
+    } | null
+    detailLevel?: 'overview' | 'board' | 'package' | 'silicon' | 'micro'
+    setViewport?: (viewport: { x: number; y: number; scale: number }) => void
+  }
+  interface Window {
+    __PIXI_TOPOLOGY_APP__?: unknown
+    __PIXI_FLOW_APP__?: unknown
+    __TOPOLOGY_DEBUG__?: TopologyDebugState
+  }
+}

tests/topology.spec.ts ADDED Viewed

	@@ -0,0 +1,234 @@

+import { expect, test, type Page } from '@playwright/test'
+type DebugObjectMap = Record<
+  string,
+  {
+    x: number
+    y: number
+    width: number
+    height: number
+  }
+>
+async function gotoScenario(page: Page, query = '') {
+  await page.goto(`/?snapshot=1${query}`)
+  await page.waitForLoadState('networkidle')
+  await page.waitForFunction(() => {
+    const debug = window.__TOPOLOGY_DEBUG__ as
+      | { ready?: boolean; objects?: DebugObjectMap }
+      | undefined
+    return Boolean(debug?.ready && debug.objects && Object.keys(debug.objects).length > 0)
+  })
+  await page.evaluate(async () => {
+    await document.fonts.ready
+  })
+}
+async function getDebugObject(page: Page, id: string) {
+  return page.evaluate((objectId) => {
+    const debug = window.__TOPOLOGY_DEBUG__ as { objects: DebugObjectMap }
+    return debug.objects[objectId]
+  }, id)
+}
+async function getFirstObjectId(page: Page, prefix: string) {
+  return page.evaluate((value) => {
+    const debug = window.__TOPOLOGY_DEBUG__ as { objects: DebugObjectMap }
+    return Object.keys(debug.objects).find((key) => key.startsWith(value)) ?? null
+  }, prefix)
+}
+async function getSurfaceOffset(page: Page) {
+  const layer = page.getByTestId('topology-interaction-layer')
+  await layer.scrollIntoViewIfNeeded()
+  const box = await layer.boundingBox()
+  if (!box) {
+    throw new Error('missing topology interaction layer')
+  }
+  return box
+}
+async function objectCenter(page: Page, id: string) {
+  const object = await getDebugObject(page, id)
+  const surface = await getSurfaceOffset(page)
+  return {
+    x: surface.x + object.x + object.width / 2,
+    y: surface.y + object.y + object.height / 2,
+  }
+}
+test('default scenario screenshot', async ({ page }) => {
+  await gotoScenario(page)
+  await expect(page.getByTestId('topology-scene')).toHaveScreenshot('topology-default.png')
+})
+test('olmo pretraining screenshot', async ({ page }) => {
+  await gotoScenario(page, '&scenario=olmo-pretraining')
+  await expect(page.getByTestId('topology-scene')).toHaveScreenshot('topology-olmo-pretraining.png')
+})
+test('llama pretraining screenshot', async ({ page }) => {
+  await gotoScenario(page, '&scenario=llama-pretraining')
+  await expect(page.getByTestId('topology-scene')).toHaveScreenshot('topology-llama-pretraining.png')
+})
+test('trinity pretraining screenshot', async ({ page }) => {
+  await gotoScenario(page, '&scenario=trinity-pretraining')
+  await expect(page.getByTestId('topology-scene')).toHaveScreenshot('topology-trinity-pretraining.png')
+})
+test('olmo long-context screenshot', async ({ page }) => {
+  await gotoScenario(page, '&scenario=olmo-long-context')
+  await expect(page.getByTestId('topology-scene')).toHaveScreenshot('topology-olmo-long-context.png')
+})
+test('trinity long-context screenshot', async ({ page }) => {
+  await gotoScenario(page, '&scenario=trinity-long-context')
+  await expect(page.getByTestId('topology-scene')).toHaveScreenshot('topology-trinity-long-context.png')
+})
+test('infeasible memory screenshot', async ({ page }) => {
+  await gotoScenario(page, '&scenario=infeasible-memory')
+  await expect(page.getByTestId('infeasible-banner')).toBeVisible()
+  await expect(page.getByTestId('topology-scene')).toHaveScreenshot('topology-infeasible-memory.png')
+})
+test('hover highlight screenshot', async ({ page }) => {
+  await gotoScenario(page, '&scenario=olmo-pretraining')
+  const nodeId = await getFirstObjectId(page, 'node-')
+  if (!nodeId) {
+    throw new Error('missing visible node object')
+  }
+  const object = await getDebugObject(page, nodeId)
+  const surface = await getSurfaceOffset(page)
+  const target = {
+    x: surface.x + object.x + 6,
+    y: surface.y + object.y + 6,
+  }
+  await page.mouse.move(target.x, target.y)
+  await page.waitForFunction((id) => {
+    const debug = window.__TOPOLOGY_DEBUG__ as { hoveredTarget?: { id: string } | null }
+    return debug.hoveredTarget?.id === id
+  }, nodeId)
+  await expect(page.getByTestId('topology-inspector')).toContainText(/host/i)
+  await expect(page.getByTestId('topology-scene')).toHaveScreenshot('topology-hover-node.png')
+})
+test('pinned inspector screenshot', async ({ page }) => {
+  await gotoScenario(page, '&scenario=olmo-pretraining')
+  const gpuId = await getFirstObjectId(page, 'gpu-')
+  if (!gpuId) {
+    throw new Error('missing visible gpu object')
+  }
+  const target = await objectCenter(page, gpuId)
+  await page.mouse.click(target.x, target.y)
+  await page.waitForFunction((id) => {
+    const debug = window.__TOPOLOGY_DEBUG__ as { pinnedTarget?: { id: string } | null }
+    return debug.pinnedTarget?.id === id
+  }, gpuId)
+  await expect(page.getByTestId('topology-inspector')).toContainText('GPU')
+  await expect(page.getByTestId('topology-scene')).toHaveScreenshot('topology-pinned-gpu.png')
+})
+test('debug overlay screenshot', async ({ page }) => {
+  await gotoScenario(page, '&debug=1')
+  await expect(page.getByTestId('topology-debug')).toBeVisible()
+  await expect(page.getByTestId('topology-interaction-layer')).toHaveScreenshot(
+    'topology-debug-overlay.png',
+  )
+})
+test('supports zoom pan and reset camera', async ({ page }) => {
+  await gotoScenario(page)
+  const layer = page.getByTestId('topology-interaction-layer')
+  await layer.scrollIntoViewIfNeeded()
+  const before = await page.evaluate(() => {
+    return (window.__TOPOLOGY_DEBUG__ as { viewport: { scale: number; x: number } }).viewport
+  })
+  const scrollBefore = await page.evaluate(() => window.scrollY)
+  const box = await layer.boundingBox()
+  if (!box) {
+    throw new Error('missing interaction layer bounds')
+  }
+  await page.mouse.move(box.x + box.width / 2, box.y + box.height / 2)
+  await page.mouse.wheel(0, -320)
+  await page.waitForFunction((scale) => {
+    const debug = window.__TOPOLOGY_DEBUG__ as { viewport: { scale: number } }
+    return debug.viewport.scale > scale
+  }, before.scale)
+  const afterZoom = await page.evaluate(() => {
+    return (window.__TOPOLOGY_DEBUG__ as { viewport: { scale: number; x: number } }).viewport
+  })
+  expect(afterZoom.scale).toBeGreaterThan(before.scale)
+  expect(await page.evaluate(() => window.scrollY)).toBe(scrollBefore)
+  await page.mouse.down()
+  await page.mouse.move(box.x + box.width / 2 + 80, box.y + box.height / 2 + 60, {
+    steps: 6,
+  })
+  await page.mouse.up()
+  await page.waitForFunction((x) => {
+    const debug = window.__TOPOLOGY_DEBUG__ as { viewport: { x: number } }
+    return debug.viewport.x !== x
+  }, afterZoom.x)
+  const afterPan = await page.evaluate(() => {
+    return (window.__TOPOLOGY_DEBUG__ as { viewport: { x: number } }).viewport
+  })
+  expect(afterPan.x).not.toBe(afterZoom.x)
+  await page.getByTestId('camera-reset').click()
+  await page.waitForFunction((scale) => {
+    const debug = window.__TOPOLOGY_DEBUG__ as { viewport: { scale: number } }
+    return Math.abs(debug.viewport.scale - scale) < 0.01
+  }, before.scale)
+  const afterReset = await page.evaluate(() => {
+    return (window.__TOPOLOGY_DEBUG__ as { viewport: { scale: number } }).viewport
+  })
+  expect(Math.abs(afterReset.scale - before.scale)).toBeLessThan(0.01)
+})
+test('supports pin and unpin via click', async ({ page }) => {
+  await gotoScenario(page, '&scenario=olmo-pretraining')
+  const gpuId = await getFirstObjectId(page, 'gpu-')
+  if (!gpuId) {
+    throw new Error('missing visible gpu object')
+  }
+  const gpu = await objectCenter(page, gpuId)
+  await page.mouse.click(gpu.x, gpu.y)
+  await page.waitForFunction((id) => {
+    const debug = window.__TOPOLOGY_DEBUG__ as { pinnedTarget?: { id: string } | null }
+    return debug.pinnedTarget?.id === id
+  }, gpuId)
+  await expect(page.getByTestId('topology-inspector')).toContainText('GPU')
+  await page.mouse.click(gpu.x, gpu.y)
+  await page.waitForFunction(() => {
+    const debug = window.__TOPOLOGY_DEBUG__ as { pinnedTarget?: { id: string } | null }
+    return debug.pinnedTarget == null
+  })
+  await expect(page.getByTestId('topology-inspector')).toContainText('Hover target')
+  await expect(page.getByTestId('topology-inspector')).toContainText('GPU')
+})
+test('supports manual model and cluster edits beyond the example presets', async ({ page }) => {
+  await gotoScenario(page, '&scenario=olmo-pretraining')
+  await page.getByLabel('Hidden dim').fill('6144')
+  await expect(page.locator('.control-badge', { hasText: 'customized' })).toBeVisible()
+  await expect(page.getByText(/hidden 6,144/i)).toBeVisible()
+  await page.getByRole('spinbutton', { name: 'Nodes', exact: true }).fill('64')
+  await expect(page.getByText('512 GPUs in cluster')).toBeVisible()
+})

tests/topologyLod.test.ts ADDED Viewed

	@@ -0,0 +1,49 @@

+import { describe, expect, it } from 'vitest'
+import { getTopologyLodState } from '../src/lib/topologyLod'
+describe('topology lod policy', () => {
+  it('keeps overview strongest at cluster-scale zoom', () => {
+    const lod = getTopologyLodState(0.05)
+    expect(lod.primaryBand).toBe('overview')
+    expect(lod.weights.overview).toBe(1)
+    expect(lod.weights.board).toBe(0)
+  })
+  it('cross-fades only between adjacent detail bands', () => {
+    const boardToPackage = getTopologyLodState(2.8)
+    const packageLod = getTopologyLodState(6.5)
+    const siliconLod = getTopologyLodState(40)
+    expect(boardToPackage.weights.board).toBeGreaterThan(0)
+    expect(boardToPackage.weights.package).toBeGreaterThan(0)
+    expect(boardToPackage.weights.silicon).toBe(0)
+    expect(packageLod.weights.package).toBeGreaterThan(0.4)
+    expect(packageLod.weights.board).toBe(0)
+    expect(siliconLod.primaryBand).toBe('silicon')
+    expect(siliconLod.weights.package).toBe(0)
+  })
+  it('activates deep isolation only at extreme gpu zoom', () => {
+    const shallow = getTopologyLodState(4)
+    const deep = getTopologyLodState(140)
+    expect(shallow.deepIsolation).toBeLessThan(0.1)
+    expect(deep.deepIsolation).toBeGreaterThan(0.8)
+    expect(deep.weights.micro).toBeGreaterThan(0.5)
+  })
+  it('keeps lod weights normalized to a single active blend', () => {
+    const scales = [0.05, 0.2, 1.1, 3, 8, 24, 110]
+    for (const scale of scales) {
+      const lod = getTopologyLodState(scale)
+      const total = Object.values(lod.weights).reduce((sum, value) => sum + value, 0)
+      const activeBands = Object.values(lod.weights).filter((value) => value > 0.001).length
+      expect(total).toBeCloseTo(1, 4)
+      expect(activeBands).toBeLessThanOrEqual(2)
+    }
+  })
+})

tests/topologySceneModel.test.ts ADDED Viewed

	@@ -0,0 +1,90 @@

+import { describe, expect, it } from 'vitest'
+import { buildTopologySceneModel, describeTarget } from '../src/lib/topologyScene'
+import { analyzeCluster } from '../src/lib/trainingClusterModel'
+import { buildWorkbenchViewModel } from '../src/lib/workbenchPresenter'
+import { getScenarioWorkbenchConfig } from '../src/lib/workbench'
+describe('topology scene model', () => {
+  it('groups nodes into racks using cluster metadata', () => {
+    const config = getScenarioWorkbenchConfig('trinity-pretraining')
+    const analysis = analyzeCluster(config.model, config.training, config.cluster, config.parallelism)
+    const viewModel = buildWorkbenchViewModel(config, analysis)
+    const scene = buildTopologySceneModel(viewModel)
+    expect(scene.pods.length).toBeGreaterThan(1)
+    expect(scene.nodes).toHaveLength(config.cluster.numNodes)
+    expect(scene.objectCounts.gpus).toBe(config.cluster.numNodes * config.cluster.gpusPerNode)
+    expect(scene.lodPolicy.maxScale).toBeGreaterThan(100)
+  })
+  it('describes GPUs with analysis-backed shard indices, including EP and FSDP', () => {
+    const config = getScenarioWorkbenchConfig('trinity-pretraining')
+    const analysis = analyzeCluster(config.model, config.training, config.cluster, config.parallelism)
+    const viewModel = buildWorkbenchViewModel(config, analysis)
+    const scene = buildTopologySceneModel(viewModel)
+    const gpu = scene.nodes.flatMap((node) => node.gpus).find((item) => item.memoryUsedGB > 0)
+    if (!gpu) {
+      throw new Error('expected at least one allocated gpu')
+    }
+    const details = describeTarget(scene, viewModel, { kind: 'gpu', id: gpu.id })
+    expect(details?.metrics.some((metric) => metric.label === 'Expert lane')).toBe(true)
+    expect(details?.metrics.some((metric) => metric.label === 'FSDP rank')).toBe(true)
+  })
+  it('keeps the scene renderable for infeasible configurations', () => {
+    const config = getScenarioWorkbenchConfig('infeasible-memory')
+    const analysis = analyzeCluster(config.model, config.training, config.cluster, config.parallelism)
+    const viewModel = buildWorkbenchViewModel(config, analysis)
+    const scene = buildTopologySceneModel(viewModel)
+    expect(analysis.feasible).toBe(false)
+    expect(scene.nodes.length).toBeGreaterThan(0)
+    expect(viewModel.warnings[0]).toContain('exceeding')
+  })
+  it('exposes EP traffic links in the Trinity preset', () => {
+    const config = getScenarioWorkbenchConfig('trinity-pretraining')
+    const analysis = analyzeCluster(config.model, config.training, config.cluster, config.parallelism)
+    const viewModel = buildWorkbenchViewModel(config, analysis)
+    const scene = buildTopologySceneModel(viewModel)
+    expect(scene.rowLinks.concat(scene.columnLinks, scene.busLinks).some((link) => link.trafficType === 'ep')).toBe(true)
+  })
+  it('exposes CP traffic links in the OLMo long-context preset', () => {
+    const config = getScenarioWorkbenchConfig('olmo-long-context')
+    const analysis = analyzeCluster(config.model, config.training, config.cluster, config.parallelism)
+    const viewModel = buildWorkbenchViewModel(config, analysis)
+    const scene = buildTopologySceneModel(viewModel)
+    expect(scene.rowLinks.concat(scene.columnLinks, scene.busLinks).some((link) => link.trafficType === 'cp')).toBe(true)
+  })
+  it('collapses cross-rack links to rack centers instead of drawing node-to-node lines across racks', () => {
+    const config = getScenarioWorkbenchConfig('llama-pretraining')
+    const analysis = analyzeCluster(config.model, config.training, config.cluster, config.parallelism)
+    const viewModel = buildWorkbenchViewModel(config, analysis)
+    const scene = buildTopologySceneModel(viewModel)
+    const rackLink = scene.rowLinks
+      .concat(scene.columnLinks)
+      .find((link) => link.scope === 'rack' && link.transport === 'infiniband')
+    expect(rackLink).toBeDefined()
+    expect(scene.pods.some((pod) => pod.centerX === rackLink?.x1 && pod.centerY === rackLink?.y1)).toBe(true)
+    expect(scene.pods.some((pod) => pod.centerX === rackLink?.x2 && pod.centerY === rackLink?.y2)).toBe(true)
+  })
+  it('keeps stable focus and lod frames for every gpu', () => {
+    const config = getScenarioWorkbenchConfig('llama-pretraining')
+    const analysis = analyzeCluster(config.model, config.training, config.cluster, config.parallelism)
+    const viewModel = buildWorkbenchViewModel(config, analysis)
+    const scene = buildTopologySceneModel(viewModel)
+    const gpus = scene.nodes.flatMap((node) => node.gpus)
+    expect(gpus.length).toBe(scene.objectCounts.gpus)
+    expect(gpus.every((gpu) => gpu.focusFrame.width >= gpu.width && gpu.lodFrame.width === gpu.width)).toBe(true)
+    expect(gpus.every((gpu) => gpu.focusFrame.height >= gpu.height && gpu.lodFrame.height === gpu.height)).toBe(true)
+  })
+})

tests/trainingClusterModel.test.ts ADDED Viewed

	@@ -0,0 +1,269 @@

+import { describe, expect, it } from 'vitest'
+import {
+  a100_80gb,
+  analyzeCluster,
+  b300,
+  cluster64GPU,
+  h100_sxm,
+  llama70B,
+  llama7B,
+  llama31_405B,
+  olmo3_32B,
+  singleNode8GPU,
+  trinityLarge400B,
+  type ClusterConfig,
+  type TrainingConfig,
+} from '../src/lib/trainingClusterModel'
+const baselineTraining: TrainingConfig = {
+  microBatchSize: 1,
+  seqLength: 2048,
+  gradAccumSteps: 8,
+  precision: 'bf16',
+  activationCheckpointing: true,
+  optimizer: 'adamw',
+}
+describe('trainingClusterModel', () => {
+  it('fits Llama 2 7B on 8x A100 80GB with TP=8 and derived DP=1', () => {
+    const analysis = analyzeCluster(llama7B(), baselineTraining, singleNode8GPU(a100_80gb()), {
+      tp: 8,
+      pp: 1,
+      cp: 1,
+      ep: 1,
+      distributedOptimizer: false,
+      fsdpShardGroupSize: 0,
+      zeroStage: 0,
+    })
+    expect(analysis.feasible).toBe(true)
+    expect(analysis.derivedParallelism.dp).toBe(1)
+    expect(analysis.memoryBreakdown.totalGB).toBeLessThan(80)
+  })
+  it('marks Llama 2 70B on 8x A100 80GB as infeasible for unsharded Adam training', () => {
+    const analysis = analyzeCluster(llama70B(), baselineTraining, singleNode8GPU(a100_80gb()), {
+      tp: 8,
+      pp: 1,
+      cp: 1,
+      ep: 1,
+      distributedOptimizer: false,
+      fsdpShardGroupSize: 0,
+      zeroStage: 0,
+    })
+    expect(analysis.feasible).toBe(false)
+    expect(analysis.infeasibilityReason).toContain('exceeding 80 GB of HBM')
+  })
+  it('keeps MFU in a realistic range for a balanced 64x H100 dense run', () => {
+    const analysis = analyzeCluster(
+      llama70B(),
+      {
+        ...baselineTraining,
+        seqLength: 4096,
+        gradAccumSteps: 16,
+      },
+      cluster64GPU(h100_sxm()),
+      {
+        tp: 4,
+        pp: 4,
+        cp: 1,
+        ep: 1,
+        distributedOptimizer: true,
+        fsdpShardGroupSize: 0,
+        zeroStage: 1,
+      },
+    )
+    expect(analysis.feasible).toBe(true)
+    expect(analysis.derivedParallelism.dp).toBe(4)
+    expect(analysis.throughput.mfu).toBeGreaterThan(0.3)
+    expect(analysis.throughput.mfu).toBeLessThanOrEqual(0.62)
+  })
+  it('reduces activation memory when CP increases and adds CP communication', () => {
+    const withoutCp = analyzeCluster(
+      llama70B(),
+      {
+        ...baselineTraining,
+        seqLength: 4096,
+      },
+      cluster64GPU(h100_sxm()),
+      {
+        tp: 2,
+        pp: 2,
+        cp: 1,
+        ep: 1,
+        distributedOptimizer: true,
+        fsdpShardGroupSize: 0,
+        zeroStage: 1,
+      },
+    )
+    const withCp = analyzeCluster(
+      llama70B(),
+      {
+        ...baselineTraining,
+        seqLength: 4096,
+      },
+      cluster64GPU(h100_sxm()),
+      {
+        tp: 2,
+        pp: 2,
+        cp: 4,
+        ep: 1,
+        distributedOptimizer: true,
+        fsdpShardGroupSize: 0,
+        zeroStage: 1,
+      },
+    )
+    expect(withCp.memoryBreakdown.activationsGB).toBeLessThan(withoutCp.memoryBreakdown.activationsGB)
+    expect(withCp.communication.cp.totalVolumePerStepGB).toBeGreaterThan(0)
+  })
+  it('reduces OLMo memory with HSDP shard groups compared with plain DP', () => {
+    const cluster = {
+      ...cluster64GPU(h100_sxm()),
+      numNodes: 128,
+      nodesPerRack: 16,
+    }
+    const plain = analyzeCluster(
+      olmo3_32B(),
+      {
+        microBatchSize: 1,
+        seqLength: 8192,
+        gradAccumSteps: 1,
+        precision: 'bf16',
+        activationCheckpointing: true,
+        optimizer: 'adamw',
+      },
+      cluster,
+      {
+        tp: 1,
+        pp: 1,
+        cp: 1,
+        ep: 1,
+        distributedOptimizer: false,
+        fsdpShardGroupSize: 0,
+        zeroStage: 0,
+      },
+    )
+    const hsdp = analyzeCluster(
+      olmo3_32B(),
+      {
+        microBatchSize: 1,
+        seqLength: 8192,
+        gradAccumSteps: 1,
+        precision: 'bf16',
+        activationCheckpointing: true,
+        optimizer: 'adamw',
+      },
+      cluster,
+      {
+        tp: 1,
+        pp: 1,
+        cp: 1,
+        ep: 1,
+        distributedOptimizer: true,
+        fsdpShardGroupSize: 256,
+        zeroStage: 3,
+      },
+    )
+    expect(hsdp.derivedParallelism.replicaGroups).toBe(4)
+    expect(hsdp.memoryBreakdown.totalGB).toBeLessThan(plain.memoryBreakdown.totalGB)
+    expect(hsdp.communication.fsdp.totalVolumePerStepGB).toBeGreaterThan(0)
+  })
+  it('models Trinity as total-parameter-heavy but active-compute-light', () => {
+    const analysis = analyzeCluster(
+      trinityLarge400B(),
+      {
+        microBatchSize: 1,
+        seqLength: 8192,
+        gradAccumSteps: 8,
+        precision: 'bf16',
+        activationCheckpointing: true,
+        optimizer: 'muon',
+      },
+      trinityCluster(),
+      {
+        tp: 1,
+        pp: 1,
+        cp: 1,
+        ep: 8,
+        distributedOptimizer: true,
+        fsdpShardGroupSize: 128,
+        zeroStage: 3,
+      },
+    )
+    expect(analysis.totalParams).toBeGreaterThan(300_000_000_000)
+    expect(analysis.activeParamsPerToken).toBe(13_000_000_000)
+    expect(analysis.communication.ep.totalVolumePerStepGB).toBeGreaterThan(0)
+    expect(analysis.communication.ep.usesInterNode).toBe(false)
+    expect(new Set(analysis.gpuMap.map((gpu) => gpu.epLane))).toEqual(
+      new Set([0, 1, 2, 3, 4, 5, 6, 7]),
+    )
+  })
+  it('derives DP for Llama 3.1 405B from world size and 4D parallelism', () => {
+    const analysis = analyzeCluster(
+      llama31_405B(),
+      {
+        microBatchSize: 1,
+        seqLength: 8192,
+        gradAccumSteps: 16,
+        precision: 'bf16',
+        activationCheckpointing: true,
+        optimizer: 'adamw',
+      },
+      llama405Cluster(),
+      {
+        tp: 8,
+        pp: 16,
+        cp: 1,
+        ep: 1,
+        distributedOptimizer: true,
+        fsdpShardGroupSize: 0,
+        zeroStage: 1,
+      },
+    )
+    expect(analysis.derivedParallelism.dp).toBe(128)
+    expect(analysis.feasible).toBe(true)
+    expect(analysis.communication.tp.totalVolumePerStepGB).toBeGreaterThan(0)
+    expect(analysis.communication.pp.totalVolumePerStepGB).toBeGreaterThan(0)
+    expect(analysis.communication.fsdp.totalVolumePerStepGB).toBe(0)
+  })
+})
+function llama405Cluster(): ClusterConfig {
+  return {
+    gpuType: h100_sxm(),
+    gpusPerNode: 8,
+    numNodes: 2048,
+    intraNodeBandwidthGBs: 900,
+    interNodeBandwidthGBs: 50,
+    nodesPerRack: 16,
+    rackLabel: 'rack',
+    nodeLabel: 'GPU host',
+    podLabel: 'rack',
+  }
+}
+function trinityCluster(): ClusterConfig {
+  return {
+    gpuType: b300(),
+    gpusPerNode: 8,
+    numNodes: 256,
+    intraNodeBandwidthGBs: 900,
+    interNodeBandwidthGBs: 50,
+    nodesPerRack: 9,
+    rackLabel: 'rack',
+    nodeLabel: 'GPU host',
+    podLabel: 'rack',
+  }
+}

tsconfig.app.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "compilerOptions": {
+    "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
+    "target": "ES2022",
+    "useDefineForClassFields": true,
+    "lib": ["ES2022", "DOM", "DOM.Iterable"],
+    "module": "ESNext",
+    "types": ["vite/client"],
+    "skipLibCheck": true,
+    /* Bundler mode */
+    "moduleResolution": "bundler",
+    "allowImportingTsExtensions": true,
+    "verbatimModuleSyntax": true,
+    "moduleDetection": "force",
+    "noEmit": true,
+    "jsx": "react-jsx",
+    /* Linting */
+    "strict": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "erasableSyntaxOnly": true,
+    "noFallthroughCasesInSwitch": true,
+    "noUncheckedSideEffectImports": true
+  },
+  "include": ["src"]
+}

tsconfig.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "files": [],
+  "references": [
+    { "path": "./tsconfig.app.json" },
+    { "path": "./tsconfig.node.json" }
+  ]
+}

tsconfig.node.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "compilerOptions": {
+    "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
+    "target": "ES2023",
+    "lib": ["ES2023"],
+    "module": "ESNext",
+    "types": ["node"],
+    "skipLibCheck": true,
+    /* Bundler mode */
+    "moduleResolution": "bundler",
+    "allowImportingTsExtensions": true,
+    "verbatimModuleSyntax": true,
+    "moduleDetection": "force",
+    "noEmit": true,
+    /* Linting */
+    "strict": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "erasableSyntaxOnly": true,
+    "noFallthroughCasesInSwitch": true,
+    "noUncheckedSideEffectImports": true
+  },
+  "include": ["vite.config.ts"]
+}

vite.config.ts ADDED Viewed

	@@ -0,0 +1,14 @@

+import { defineConfig } from 'vite'
+import react from '@vitejs/plugin-react'
+export default defineConfig({
+  plugins: [react()],
+  server: {
+    host: '0.0.0.0',
+    port: 7860,
+  },
+  preview: {
+    host: '0.0.0.0',
+    port: 7860,
+  },
+})

vitest.config.ts ADDED Viewed

	@@ -0,0 +1,7 @@

+import { defineConfig } from 'vitest/config'
+export default defineConfig({
+  test: {
+    include: ['tests/**/*.test.ts'],
+  },
+})