Spaces:
Sleeping
Sleeping
Publish WIP HF Space snapshot
Browse files- .dockerignore +4 -0
- .gitignore +27 -0
- Dockerfile +22 -0
- README.md +53 -6
- compose.yaml +12 -0
- eslint.config.js +23 -0
- index.html +13 -0
- package-lock.json +0 -0
- package.json +40 -0
- playwright.config.ts +36 -0
- src/App.css +641 -0
- src/App.tsx +205 -0
- src/components/ClusterMap.tsx +2086 -0
- src/components/ControlsPanel.tsx +688 -0
- src/components/pixi/PixiSurface.tsx +50 -0
- src/hooks/useElementSize.ts +48 -0
- src/index.css +56 -0
- src/lib/linkedFocus.ts +35 -0
- src/lib/topologyLod.ts +216 -0
- src/lib/topologyScene.ts +980 -0
- src/lib/trainingClusterModel.ts +1882 -0
- src/lib/viewOptions.ts +50 -0
- src/lib/workbench.ts +395 -0
- src/lib/workbenchPresenter.ts +220 -0
- src/main.tsx +15 -0
- src/types/global.d.ts +49 -0
- tests/topology.spec.ts +234 -0
- tests/topologyLod.test.ts +49 -0
- tests/topologySceneModel.test.ts +90 -0
- tests/trainingClusterModel.test.ts +269 -0
- tsconfig.app.json +28 -0
- tsconfig.json +7 -0
- tsconfig.node.json +26 -0
- vite.config.ts +14 -0
- vitest.config.ts +7 -0
.dockerignore
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
node_modules
|
| 2 |
+
dist
|
| 3 |
+
.git
|
| 4 |
+
npm-debug.log
|
.gitignore
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Logs
|
| 2 |
+
logs
|
| 3 |
+
*.log
|
| 4 |
+
npm-debug.log*
|
| 5 |
+
yarn-debug.log*
|
| 6 |
+
yarn-error.log*
|
| 7 |
+
pnpm-debug.log*
|
| 8 |
+
lerna-debug.log*
|
| 9 |
+
|
| 10 |
+
node_modules
|
| 11 |
+
dist
|
| 12 |
+
dist-ssr
|
| 13 |
+
*.local
|
| 14 |
+
test-results
|
| 15 |
+
playwright-report
|
| 16 |
+
tests/topology.spec.ts-snapshots
|
| 17 |
+
|
| 18 |
+
# Editor directories and files
|
| 19 |
+
.vscode/*
|
| 20 |
+
!.vscode/extensions.json
|
| 21 |
+
.idea
|
| 22 |
+
.DS_Store
|
| 23 |
+
*.suo
|
| 24 |
+
*.ntvs*
|
| 25 |
+
*.njsproj
|
| 26 |
+
*.sln
|
| 27 |
+
*.sw?
|
Dockerfile
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM node:22-alpine AS base
|
| 2 |
+
WORKDIR /app
|
| 3 |
+
COPY package*.json ./
|
| 4 |
+
RUN npm ci
|
| 5 |
+
|
| 6 |
+
FROM base AS dev
|
| 7 |
+
COPY . .
|
| 8 |
+
EXPOSE 7860
|
| 9 |
+
CMD ["npm", "run", "dev"]
|
| 10 |
+
|
| 11 |
+
FROM base AS build
|
| 12 |
+
COPY . .
|
| 13 |
+
RUN npm run build
|
| 14 |
+
|
| 15 |
+
FROM node:22-alpine AS production
|
| 16 |
+
RUN npm install -g serve@14.2.4
|
| 17 |
+
USER node
|
| 18 |
+
ENV HOME=/home/node
|
| 19 |
+
WORKDIR /home/node/app
|
| 20 |
+
COPY --from=build --chown=node:node /app/dist ./dist
|
| 21 |
+
EXPOSE 7860
|
| 22 |
+
CMD ["serve", "-s", "dist", "-l", "7860"]
|
README.md
CHANGED
|
@@ -1,12 +1,59 @@
|
|
| 1 |
---
|
| 2 |
-
title: Illustrated Cluster
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
colorTo: indigo
|
| 6 |
sdk: docker
|
|
|
|
| 7 |
pinned: false
|
| 8 |
license: mit
|
| 9 |
-
short_description:
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: "[WIP] Illustrated Training Cluster"
|
| 3 |
+
colorFrom: yellow
|
| 4 |
+
colorTo: green
|
|
|
|
| 5 |
sdk: docker
|
| 6 |
+
app_port: 7860
|
| 7 |
pinned: false
|
| 8 |
license: mit
|
| 9 |
+
short_description: "[WIP] Interactive visualization of an LLM training cluster"
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# [WIP] Illustrated Training Cluster
|
| 13 |
+
|
| 14 |
+
Interactive workbench for exploring how large-model training layouts map onto GPU clusters.
|
| 15 |
+
|
| 16 |
+
Current WIP scope:
|
| 17 |
+
|
| 18 |
+
- compute-backed memory, communication, and throughput estimates
|
| 19 |
+
- linked cluster and transformer visualizations
|
| 20 |
+
- editable model, cluster, training, and parallelism controls
|
| 21 |
+
- built-in OLMo 3 32B and Trinity Large 400B starting points
|
| 22 |
+
|
| 23 |
+
Temporary note:
|
| 24 |
+
|
| 25 |
+
- the Llama 3.1 405B example is hidden from the UI while its training recipe is being reworked
|
| 26 |
+
|
| 27 |
+
## Stack
|
| 28 |
+
|
| 29 |
+
- React 19 + TypeScript
|
| 30 |
+
- Vite
|
| 31 |
+
- PixiJS + `@pixi/react`
|
| 32 |
+
- Docker for local runs and Hugging Face Spaces deployment
|
| 33 |
+
|
| 34 |
+
## Local development
|
| 35 |
+
|
| 36 |
+
```bash
|
| 37 |
+
docker compose up --build
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
Then open [http://localhost:7860](http://localhost:7860).
|
| 41 |
+
|
| 42 |
+
## Checks
|
| 43 |
+
|
| 44 |
+
```bash
|
| 45 |
+
npm run test:unit
|
| 46 |
+
npm run lint
|
| 47 |
+
npm run build
|
| 48 |
+
npm run test:e2e
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
## Debugging and snapshots
|
| 52 |
+
|
| 53 |
+
- `?debug=1` enables the in-app debug overlay
|
| 54 |
+
- `?snapshot=1` freezes animation for deterministic screenshots
|
| 55 |
+
- `?scenario=default|olmo-pretraining|olmo-long-context|llama-pretraining|llama-long-context|trinity-pretraining|trinity-long-context|infeasible-memory`
|
| 56 |
+
|
| 57 |
+
## Hugging Face Spaces
|
| 58 |
+
|
| 59 |
+
This repository is configured as a Docker Space. Hugging Face builds the root `Dockerfile` and serves the app on port `7860`.
|
compose.yaml
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
services:
|
| 2 |
+
app:
|
| 3 |
+
build:
|
| 4 |
+
context: .
|
| 5 |
+
target: dev
|
| 6 |
+
ports:
|
| 7 |
+
- '7860:7860'
|
| 8 |
+
environment:
|
| 9 |
+
CHOKIDAR_USEPOLLING: 'true'
|
| 10 |
+
volumes:
|
| 11 |
+
- .:/app
|
| 12 |
+
- /app/node_modules
|
eslint.config.js
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import js from '@eslint/js'
|
| 2 |
+
import globals from 'globals'
|
| 3 |
+
import reactHooks from 'eslint-plugin-react-hooks'
|
| 4 |
+
import reactRefresh from 'eslint-plugin-react-refresh'
|
| 5 |
+
import tseslint from 'typescript-eslint'
|
| 6 |
+
import { defineConfig, globalIgnores } from 'eslint/config'
|
| 7 |
+
|
| 8 |
+
export default defineConfig([
|
| 9 |
+
globalIgnores(['dist']),
|
| 10 |
+
{
|
| 11 |
+
files: ['**/*.{ts,tsx}'],
|
| 12 |
+
extends: [
|
| 13 |
+
js.configs.recommended,
|
| 14 |
+
tseslint.configs.recommended,
|
| 15 |
+
reactHooks.configs.flat.recommended,
|
| 16 |
+
reactRefresh.configs.vite,
|
| 17 |
+
],
|
| 18 |
+
languageOptions: {
|
| 19 |
+
ecmaVersion: 2020,
|
| 20 |
+
globals: globals.browser,
|
| 21 |
+
},
|
| 22 |
+
},
|
| 23 |
+
])
|
index.html
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!doctype html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8" />
|
| 5 |
+
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
| 6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 7 |
+
<title>[WIP] Illustrated Training Cluster</title>
|
| 8 |
+
</head>
|
| 9 |
+
<body>
|
| 10 |
+
<div id="root"></div>
|
| 11 |
+
<script type="module" src="/src/main.tsx"></script>
|
| 12 |
+
</body>
|
| 13 |
+
</html>
|
package-lock.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
package.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "cluster-topology-viz",
|
| 3 |
+
"private": true,
|
| 4 |
+
"version": "0.0.0",
|
| 5 |
+
"type": "module",
|
| 6 |
+
"scripts": {
|
| 7 |
+
"dev": "vite",
|
| 8 |
+
"build": "tsc -b && vite build",
|
| 9 |
+
"lint": "eslint .",
|
| 10 |
+
"preview": "vite preview",
|
| 11 |
+
"check": "npm run lint && npm run build",
|
| 12 |
+
"test:unit": "vitest run",
|
| 13 |
+
"test:e2e": "npm run build && playwright test",
|
| 14 |
+
"test:e2e:update": "npm run build && playwright test --update-snapshots"
|
| 15 |
+
},
|
| 16 |
+
"dependencies": {
|
| 17 |
+
"@fontsource/ibm-plex-mono": "^5.2.7",
|
| 18 |
+
"@fontsource/space-grotesk": "^5.2.10",
|
| 19 |
+
"@pixi/react": "^8.0.5",
|
| 20 |
+
"pixi.js": "^8.16.0",
|
| 21 |
+
"react": "^19.2.0",
|
| 22 |
+
"react-dom": "^19.2.0"
|
| 23 |
+
},
|
| 24 |
+
"devDependencies": {
|
| 25 |
+
"@eslint/js": "^9.39.1",
|
| 26 |
+
"@playwright/test": "^1.58.2",
|
| 27 |
+
"@types/node": "^24.10.1",
|
| 28 |
+
"@types/react": "^19.2.7",
|
| 29 |
+
"@types/react-dom": "^19.2.3",
|
| 30 |
+
"@vitejs/plugin-react": "^5.1.1",
|
| 31 |
+
"eslint": "^9.39.1",
|
| 32 |
+
"eslint-plugin-react-hooks": "^7.0.1",
|
| 33 |
+
"eslint-plugin-react-refresh": "^0.4.24",
|
| 34 |
+
"globals": "^16.5.0",
|
| 35 |
+
"typescript": "~5.9.3",
|
| 36 |
+
"typescript-eslint": "^8.48.0",
|
| 37 |
+
"vite": "^7.3.1",
|
| 38 |
+
"vitest": "^4.0.18"
|
| 39 |
+
}
|
| 40 |
+
}
|
playwright.config.ts
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { defineConfig, devices } from '@playwright/test'
|
| 2 |
+
|
| 3 |
+
export default defineConfig({
|
| 4 |
+
testDir: './tests',
|
| 5 |
+
testMatch: /.*\.spec\.ts/,
|
| 6 |
+
fullyParallel: false,
|
| 7 |
+
retries: 0,
|
| 8 |
+
reporter: 'list',
|
| 9 |
+
workers: 1,
|
| 10 |
+
use: {
|
| 11 |
+
baseURL: 'http://127.0.0.1:4173',
|
| 12 |
+
trace: 'on-first-retry',
|
| 13 |
+
viewport: {
|
| 14 |
+
width: 1600,
|
| 15 |
+
height: 1100,
|
| 16 |
+
},
|
| 17 |
+
},
|
| 18 |
+
projects: [
|
| 19 |
+
{
|
| 20 |
+
name: 'chromium',
|
| 21 |
+
use: {
|
| 22 |
+
...devices['Desktop Chrome'],
|
| 23 |
+
viewport: {
|
| 24 |
+
width: 1600,
|
| 25 |
+
height: 1100,
|
| 26 |
+
},
|
| 27 |
+
},
|
| 28 |
+
},
|
| 29 |
+
],
|
| 30 |
+
webServer: {
|
| 31 |
+
command: 'npm run preview -- --host 127.0.0.1 --port 4173',
|
| 32 |
+
port: 4173,
|
| 33 |
+
reuseExistingServer: true,
|
| 34 |
+
timeout: 120000,
|
| 35 |
+
},
|
| 36 |
+
})
|
src/App.css
ADDED
|
@@ -0,0 +1,641 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.workbench-shell {
|
| 2 |
+
max-width: 1680px;
|
| 3 |
+
margin: 0 auto;
|
| 4 |
+
padding: 18px;
|
| 5 |
+
}
|
| 6 |
+
|
| 7 |
+
.mini-label {
|
| 8 |
+
margin: 0 0 6px;
|
| 9 |
+
color: var(--accent-cool);
|
| 10 |
+
font-family: var(--font-mono);
|
| 11 |
+
font-size: 0.72rem;
|
| 12 |
+
letter-spacing: 0.13em;
|
| 13 |
+
text-transform: uppercase;
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
.app-topbar {
|
| 17 |
+
display: grid;
|
| 18 |
+
gap: 14px;
|
| 19 |
+
margin-bottom: 14px;
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
.title-block {
|
| 23 |
+
display: grid;
|
| 24 |
+
gap: 4px;
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
.title-block h1 {
|
| 28 |
+
margin: 0;
|
| 29 |
+
color: var(--ink-strong);
|
| 30 |
+
font-size: clamp(1.7rem, 2vw, 2.2rem);
|
| 31 |
+
line-height: 1;
|
| 32 |
+
letter-spacing: -0.04em;
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
.title-copy {
|
| 36 |
+
margin: 0;
|
| 37 |
+
max-width: 88ch;
|
| 38 |
+
color: var(--ink-soft);
|
| 39 |
+
font-size: 0.98rem;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
.summary-strip {
|
| 43 |
+
display: grid;
|
| 44 |
+
grid-template-columns: minmax(260px, 1.8fr) repeat(4, minmax(0, 1fr));
|
| 45 |
+
gap: 10px;
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
.summary-card,
|
| 49 |
+
.controls-band,
|
| 50 |
+
.map-panel,
|
| 51 |
+
.side-card {
|
| 52 |
+
border: 1px solid var(--panel-stroke);
|
| 53 |
+
background: rgba(253, 252, 248, 0.92);
|
| 54 |
+
box-shadow: 0 12px 28px rgba(19, 42, 51, 0.06);
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
.summary-card {
|
| 58 |
+
min-height: 88px;
|
| 59 |
+
padding: 12px 14px;
|
| 60 |
+
border-radius: 16px;
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
.summary-card span,
|
| 64 |
+
.fact-row span,
|
| 65 |
+
.inspector-grid dt {
|
| 66 |
+
display: block;
|
| 67 |
+
color: var(--ink-muted);
|
| 68 |
+
font-size: 0.76rem;
|
| 69 |
+
text-transform: uppercase;
|
| 70 |
+
letter-spacing: 0.08em;
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
.summary-card strong,
|
| 74 |
+
.fact-row strong,
|
| 75 |
+
.inspector-grid dd {
|
| 76 |
+
display: block;
|
| 77 |
+
margin-top: 6px;
|
| 78 |
+
color: var(--ink-strong);
|
| 79 |
+
font-size: 1.15rem;
|
| 80 |
+
line-height: 1.05;
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
.summary-card p {
|
| 84 |
+
margin: 8px 0 0;
|
| 85 |
+
color: var(--ink-soft);
|
| 86 |
+
font-size: 0.9rem;
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
.summary-card-wide strong {
|
| 90 |
+
font-size: 1.25rem;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
.controls-band {
|
| 94 |
+
padding: 12px 14px 14px;
|
| 95 |
+
border-radius: 16px;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
.controls-head {
|
| 99 |
+
display: flex;
|
| 100 |
+
justify-content: space-between;
|
| 101 |
+
gap: 12px;
|
| 102 |
+
align-items: flex-end;
|
| 103 |
+
margin-bottom: 12px;
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
.controls-head h2,
|
| 107 |
+
.topology-header h2,
|
| 108 |
+
.side-header h3 {
|
| 109 |
+
margin: 0;
|
| 110 |
+
color: var(--ink-strong);
|
| 111 |
+
font-size: 1.1rem;
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
.controls-meta {
|
| 115 |
+
display: flex;
|
| 116 |
+
flex-wrap: wrap;
|
| 117 |
+
gap: 8px;
|
| 118 |
+
align-items: center;
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
.controls-meta span,
|
| 122 |
+
.reset-chip,
|
| 123 |
+
.scene-button {
|
| 124 |
+
padding: 7px 10px;
|
| 125 |
+
border-radius: 999px;
|
| 126 |
+
border: 1px solid rgba(19, 58, 80, 0.09);
|
| 127 |
+
background: rgba(246, 244, 238, 0.92);
|
| 128 |
+
color: var(--ink-soft);
|
| 129 |
+
font-family: var(--font-mono);
|
| 130 |
+
font-size: 0.78rem;
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
.reset-chip,
|
| 134 |
+
.scene-button {
|
| 135 |
+
color: var(--accent-warm);
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
.controls-grid {
|
| 139 |
+
display: grid;
|
| 140 |
+
grid-template-columns: repeat(4, minmax(0, 1fr));
|
| 141 |
+
gap: 10px;
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
.controls-stack {
|
| 145 |
+
display: grid;
|
| 146 |
+
gap: 10px;
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
.controls-grid-parallelism {
|
| 150 |
+
grid-template-columns: repeat(5, minmax(0, 1fr));
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
.control-card {
|
| 154 |
+
border: 1px solid rgba(19, 58, 80, 0.08);
|
| 155 |
+
border-radius: 14px;
|
| 156 |
+
padding: 11px 12px;
|
| 157 |
+
background: rgba(250, 248, 242, 0.96);
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
.field-grid {
|
| 161 |
+
display: grid;
|
| 162 |
+
grid-template-columns: repeat(2, minmax(0, 1fr));
|
| 163 |
+
gap: 10px;
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
.field-grid-wide {
|
| 167 |
+
grid-template-columns: repeat(3, minmax(0, 1fr));
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
.control-card-header {
|
| 171 |
+
display: flex;
|
| 172 |
+
justify-content: space-between;
|
| 173 |
+
gap: 10px;
|
| 174 |
+
align-items: center;
|
| 175 |
+
margin-bottom: 10px;
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
.control-card-header h3 {
|
| 179 |
+
margin: 0;
|
| 180 |
+
color: var(--ink-strong);
|
| 181 |
+
font-size: 1rem;
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
.control-card-header p {
|
| 185 |
+
margin: 2px 0 0;
|
| 186 |
+
color: var(--ink-soft);
|
| 187 |
+
font-size: 0.84rem;
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
.control-badge {
|
| 191 |
+
padding: 5px 8px;
|
| 192 |
+
border-radius: 999px;
|
| 193 |
+
background: rgba(17, 122, 112, 0.1);
|
| 194 |
+
color: var(--accent-cool);
|
| 195 |
+
font-family: var(--font-mono);
|
| 196 |
+
font-size: 0.76rem;
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
.control-field {
|
| 200 |
+
display: grid;
|
| 201 |
+
gap: 6px;
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
.control-field span,
|
| 205 |
+
.control-toggle span {
|
| 206 |
+
color: var(--ink-muted);
|
| 207 |
+
font-size: 0.76rem;
|
| 208 |
+
letter-spacing: 0.06em;
|
| 209 |
+
text-transform: uppercase;
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
.control-field input,
|
| 213 |
+
.control-field select {
|
| 214 |
+
width: 100%;
|
| 215 |
+
padding: 8px 10px;
|
| 216 |
+
border: 1px solid rgba(19, 58, 80, 0.12);
|
| 217 |
+
border-radius: 10px;
|
| 218 |
+
background: #fffdf8;
|
| 219 |
+
color: var(--ink-strong);
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
.control-field-toggle {
|
| 223 |
+
align-self: end;
|
| 224 |
+
}
|
| 225 |
+
|
| 226 |
+
.control-toggle {
|
| 227 |
+
display: inline-flex;
|
| 228 |
+
align-items: center;
|
| 229 |
+
gap: 8px;
|
| 230 |
+
min-height: 40px;
|
| 231 |
+
padding: 8px 10px;
|
| 232 |
+
border: 1px solid rgba(19, 58, 80, 0.12);
|
| 233 |
+
border-radius: 10px;
|
| 234 |
+
background: #fffdf8;
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
.control-toggle input {
|
| 238 |
+
accent-color: var(--accent-cool);
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
.option-strip {
|
| 242 |
+
display: flex;
|
| 243 |
+
flex-wrap: wrap;
|
| 244 |
+
gap: 6px;
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
.option-chip {
|
| 248 |
+
min-width: 38px;
|
| 249 |
+
padding: 7px 9px;
|
| 250 |
+
border: 1px solid rgba(19, 58, 80, 0.12);
|
| 251 |
+
border-radius: 10px;
|
| 252 |
+
background: #fffdf8;
|
| 253 |
+
color: var(--ink-soft);
|
| 254 |
+
font-weight: 500;
|
| 255 |
+
transition:
|
| 256 |
+
background-color 150ms ease,
|
| 257 |
+
border-color 150ms ease,
|
| 258 |
+
transform 150ms ease;
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
.option-chip:hover {
|
| 262 |
+
transform: translateY(-1px);
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
.option-chip.active {
|
| 266 |
+
border-color: rgba(17, 122, 112, 0.26);
|
| 267 |
+
background: rgba(225, 246, 241, 0.96);
|
| 268 |
+
color: var(--accent-cool);
|
| 269 |
+
}
|
| 270 |
+
|
| 271 |
+
.analysis-stack {
|
| 272 |
+
display: grid;
|
| 273 |
+
gap: 14px;
|
| 274 |
+
margin-top: 14px;
|
| 275 |
+
align-items: start;
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
.status-banner {
|
| 279 |
+
display: flex;
|
| 280 |
+
gap: 10px;
|
| 281 |
+
align-items: center;
|
| 282 |
+
padding: 10px 12px;
|
| 283 |
+
border-radius: 14px;
|
| 284 |
+
border: 1px solid rgba(214, 98, 37, 0.16);
|
| 285 |
+
background: rgba(255, 245, 236, 0.96);
|
| 286 |
+
color: var(--ink-soft);
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
.status-banner strong {
|
| 290 |
+
color: var(--accent-warm);
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
.map-panel {
|
| 294 |
+
padding: 12px;
|
| 295 |
+
border-radius: 16px;
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
.topology-header {
|
| 299 |
+
display: flex;
|
| 300 |
+
justify-content: space-between;
|
| 301 |
+
gap: 12px;
|
| 302 |
+
align-items: flex-end;
|
| 303 |
+
margin-bottom: 10px;
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
.topology-header-actions {
|
| 307 |
+
display: flex;
|
| 308 |
+
flex-wrap: wrap;
|
| 309 |
+
gap: 10px;
|
| 310 |
+
align-items: center;
|
| 311 |
+
justify-content: flex-end;
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
.topology-scene-shell {
|
| 315 |
+
display: grid;
|
| 316 |
+
gap: 10px;
|
| 317 |
+
}
|
| 318 |
+
|
| 319 |
+
.scene-toolbar {
|
| 320 |
+
display: flex;
|
| 321 |
+
justify-content: flex-end;
|
| 322 |
+
gap: 10px;
|
| 323 |
+
align-items: center;
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
.scene-toolbar-actions {
|
| 327 |
+
display: flex;
|
| 328 |
+
gap: 8px;
|
| 329 |
+
flex-wrap: wrap;
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
.pixi-surface-wrap {
|
| 333 |
+
position: relative;
|
| 334 |
+
width: 100%;
|
| 335 |
+
overflow: hidden;
|
| 336 |
+
border-radius: 18px;
|
| 337 |
+
background:
|
| 338 |
+
radial-gradient(circle at 10% 10%, rgba(24, 155, 141, 0.14), transparent 22%),
|
| 339 |
+
radial-gradient(circle at 100% 0%, rgba(255, 175, 111, 0.16), transparent 24%),
|
| 340 |
+
linear-gradient(180deg, #0f202d 0%, #08141d 100%);
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
.topology-surface-wrap {
|
| 344 |
+
min-height: 760px;
|
| 345 |
+
height: min(76vh, 980px);
|
| 346 |
+
user-select: none;
|
| 347 |
+
touch-action: none;
|
| 348 |
+
overscroll-behavior: contain;
|
| 349 |
+
}
|
| 350 |
+
|
| 351 |
+
.topology-interaction-layer {
|
| 352 |
+
position: absolute;
|
| 353 |
+
inset: 0;
|
| 354 |
+
z-index: 1;
|
| 355 |
+
background: rgba(0, 0, 0, 0.001);
|
| 356 |
+
cursor: grab;
|
| 357 |
+
touch-action: none;
|
| 358 |
+
overscroll-behavior: contain;
|
| 359 |
+
}
|
| 360 |
+
|
| 361 |
+
.topology-interaction-layer.is-dragging {
|
| 362 |
+
cursor: grabbing;
|
| 363 |
+
}
|
| 364 |
+
|
| 365 |
+
.pixi-surface,
|
| 366 |
+
.pixi-canvas,
|
| 367 |
+
.pixi-surface canvas {
|
| 368 |
+
display: block;
|
| 369 |
+
width: 100%;
|
| 370 |
+
height: 100%;
|
| 371 |
+
}
|
| 372 |
+
|
| 373 |
+
.scene-inspector,
|
| 374 |
+
.scene-debug-panel {
|
| 375 |
+
position: absolute;
|
| 376 |
+
z-index: 2;
|
| 377 |
+
max-width: min(320px, calc(100% - 32px));
|
| 378 |
+
border: 1px solid rgba(255, 255, 255, 0.08);
|
| 379 |
+
border-radius: 16px;
|
| 380 |
+
backdrop-filter: blur(14px);
|
| 381 |
+
pointer-events: auto;
|
| 382 |
+
}
|
| 383 |
+
|
| 384 |
+
.scene-inspector {
|
| 385 |
+
left: 16px;
|
| 386 |
+
bottom: 16px;
|
| 387 |
+
padding: 12px 14px;
|
| 388 |
+
background: rgba(7, 19, 29, 0.78);
|
| 389 |
+
color: rgba(229, 241, 246, 0.92);
|
| 390 |
+
}
|
| 391 |
+
|
| 392 |
+
.scene-inspector .mini-label {
|
| 393 |
+
color: rgba(135, 244, 226, 0.82);
|
| 394 |
+
}
|
| 395 |
+
|
| 396 |
+
.scene-inspector h3 {
|
| 397 |
+
margin: 0;
|
| 398 |
+
font-size: 1rem;
|
| 399 |
+
}
|
| 400 |
+
|
| 401 |
+
.inspector-subheading {
|
| 402 |
+
margin: 6px 0 0;
|
| 403 |
+
color: rgba(179, 201, 211, 0.82);
|
| 404 |
+
font-size: 0.88rem;
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
.inspector-link-note {
|
| 408 |
+
margin: 10px 0 0;
|
| 409 |
+
color: rgba(255, 223, 161, 0.9);
|
| 410 |
+
font-size: 0.82rem;
|
| 411 |
+
}
|
| 412 |
+
|
| 413 |
+
.inspector-grid {
|
| 414 |
+
display: grid;
|
| 415 |
+
grid-template-columns: repeat(2, minmax(0, 1fr));
|
| 416 |
+
gap: 10px 14px;
|
| 417 |
+
margin: 12px 0 0;
|
| 418 |
+
}
|
| 419 |
+
|
| 420 |
+
.inspector-grid div {
|
| 421 |
+
margin: 0;
|
| 422 |
+
}
|
| 423 |
+
|
| 424 |
+
.inspector-grid dt {
|
| 425 |
+
color: rgba(160, 188, 200, 0.78);
|
| 426 |
+
font-size: 0.68rem;
|
| 427 |
+
}
|
| 428 |
+
|
| 429 |
+
.inspector-grid dd {
|
| 430 |
+
margin: 4px 0 0;
|
| 431 |
+
color: rgba(243, 250, 252, 0.96);
|
| 432 |
+
font-size: 0.95rem;
|
| 433 |
+
}
|
| 434 |
+
|
| 435 |
+
.scene-debug-panel {
|
| 436 |
+
top: 16px;
|
| 437 |
+
right: 16px;
|
| 438 |
+
padding: 12px 14px;
|
| 439 |
+
background: rgba(10, 20, 31, 0.82);
|
| 440 |
+
color: rgba(225, 238, 244, 0.92);
|
| 441 |
+
}
|
| 442 |
+
|
| 443 |
+
.scene-debug-panel .mini-label {
|
| 444 |
+
color: rgba(255, 221, 156, 0.82);
|
| 445 |
+
}
|
| 446 |
+
|
| 447 |
+
.debug-toggle-grid {
|
| 448 |
+
display: grid;
|
| 449 |
+
gap: 8px;
|
| 450 |
+
}
|
| 451 |
+
|
| 452 |
+
.debug-toggle-grid label {
|
| 453 |
+
display: flex;
|
| 454 |
+
align-items: center;
|
| 455 |
+
gap: 8px;
|
| 456 |
+
color: rgba(228, 240, 245, 0.92);
|
| 457 |
+
font-size: 0.86rem;
|
| 458 |
+
}
|
| 459 |
+
|
| 460 |
+
.debug-toggle-grid input {
|
| 461 |
+
accent-color: var(--accent-warm);
|
| 462 |
+
}
|
| 463 |
+
|
| 464 |
+
.debug-stats {
|
| 465 |
+
display: flex;
|
| 466 |
+
flex-wrap: wrap;
|
| 467 |
+
gap: 8px;
|
| 468 |
+
margin-top: 12px;
|
| 469 |
+
}
|
| 470 |
+
|
| 471 |
+
.debug-stats span {
|
| 472 |
+
padding: 5px 8px;
|
| 473 |
+
border-radius: 999px;
|
| 474 |
+
background: rgba(255, 255, 255, 0.06);
|
| 475 |
+
color: rgba(235, 245, 248, 0.9);
|
| 476 |
+
font-family: var(--font-mono);
|
| 477 |
+
font-size: 0.74rem;
|
| 478 |
+
}
|
| 479 |
+
|
| 480 |
+
.side-column {
|
| 481 |
+
display: grid;
|
| 482 |
+
gap: 14px;
|
| 483 |
+
}
|
| 484 |
+
|
| 485 |
+
.side-card {
|
| 486 |
+
padding: 12px;
|
| 487 |
+
border-radius: 16px;
|
| 488 |
+
}
|
| 489 |
+
|
| 490 |
+
.side-header {
|
| 491 |
+
margin-bottom: 12px;
|
| 492 |
+
}
|
| 493 |
+
|
| 494 |
+
.facts-grid {
|
| 495 |
+
display: grid;
|
| 496 |
+
gap: 10px;
|
| 497 |
+
}
|
| 498 |
+
|
| 499 |
+
.fact-row {
|
| 500 |
+
padding-bottom: 10px;
|
| 501 |
+
border-bottom: 1px solid rgba(19, 58, 80, 0.08);
|
| 502 |
+
}
|
| 503 |
+
|
| 504 |
+
.fact-row:last-child {
|
| 505 |
+
padding-bottom: 0;
|
| 506 |
+
border-bottom: 0;
|
| 507 |
+
}
|
| 508 |
+
|
| 509 |
+
.warning-list {
|
| 510 |
+
display: grid;
|
| 511 |
+
gap: 8px;
|
| 512 |
+
margin-top: 12px;
|
| 513 |
+
}
|
| 514 |
+
|
| 515 |
+
.warning-pill {
|
| 516 |
+
border-left: 3px solid rgba(214, 98, 37, 0.74);
|
| 517 |
+
border-radius: 10px;
|
| 518 |
+
padding: 9px 10px;
|
| 519 |
+
background: rgba(255, 244, 232, 0.92);
|
| 520 |
+
color: var(--ink-soft);
|
| 521 |
+
font-size: 0.88rem;
|
| 522 |
+
}
|
| 523 |
+
|
| 524 |
+
.fullscreen-overlay {
|
| 525 |
+
position: fixed;
|
| 526 |
+
inset: 0;
|
| 527 |
+
z-index: 40;
|
| 528 |
+
display: grid;
|
| 529 |
+
place-items: center;
|
| 530 |
+
padding: 20px;
|
| 531 |
+
background: rgba(4, 12, 20, 0.72);
|
| 532 |
+
backdrop-filter: blur(10px);
|
| 533 |
+
}
|
| 534 |
+
|
| 535 |
+
.fullscreen-shell {
|
| 536 |
+
display: grid;
|
| 537 |
+
gap: 12px;
|
| 538 |
+
width: min(1600px, 100%);
|
| 539 |
+
max-height: calc(100vh - 40px);
|
| 540 |
+
padding: 14px;
|
| 541 |
+
border: 1px solid rgba(255, 255, 255, 0.08);
|
| 542 |
+
border-radius: 22px;
|
| 543 |
+
background: rgba(252, 250, 245, 0.98);
|
| 544 |
+
box-shadow: 0 24px 80px rgba(4, 12, 20, 0.38);
|
| 545 |
+
}
|
| 546 |
+
|
| 547 |
+
.fullscreen-toolbar {
|
| 548 |
+
display: flex;
|
| 549 |
+
justify-content: space-between;
|
| 550 |
+
gap: 12px;
|
| 551 |
+
align-items: flex-end;
|
| 552 |
+
}
|
| 553 |
+
|
| 554 |
+
.fullscreen-toolbar h2 {
|
| 555 |
+
margin: 0;
|
| 556 |
+
color: var(--ink-strong);
|
| 557 |
+
font-size: 1.2rem;
|
| 558 |
+
}
|
| 559 |
+
|
| 560 |
+
.fullscreen-content {
|
| 561 |
+
min-height: 0;
|
| 562 |
+
overflow: auto;
|
| 563 |
+
}
|
| 564 |
+
|
| 565 |
+
.fullscreen-content .map-panel {
|
| 566 |
+
min-height: calc(100vh - 168px);
|
| 567 |
+
}
|
| 568 |
+
|
| 569 |
+
.fullscreen-content .topology-surface-wrap {
|
| 570 |
+
height: calc(100vh - 290px);
|
| 571 |
+
min-height: 680px;
|
| 572 |
+
}
|
| 573 |
+
|
| 574 |
+
@media (max-width: 1400px) {
|
| 575 |
+
.summary-strip {
|
| 576 |
+
grid-template-columns: repeat(2, minmax(0, 1fr));
|
| 577 |
+
}
|
| 578 |
+
|
| 579 |
+
.fullscreen-shell {
|
| 580 |
+
width: 100%;
|
| 581 |
+
}
|
| 582 |
+
}
|
| 583 |
+
|
| 584 |
+
@media (max-width: 1040px) {
|
| 585 |
+
.controls-grid,
|
| 586 |
+
.controls-grid-parallelism,
|
| 587 |
+
.field-grid,
|
| 588 |
+
.field-grid-wide {
|
| 589 |
+
grid-template-columns: repeat(2, minmax(0, 1fr));
|
| 590 |
+
}
|
| 591 |
+
|
| 592 |
+
.controls-head,
|
| 593 |
+
.topology-header,
|
| 594 |
+
.scene-toolbar,
|
| 595 |
+
.fullscreen-toolbar {
|
| 596 |
+
flex-direction: column;
|
| 597 |
+
align-items: flex-start;
|
| 598 |
+
}
|
| 599 |
+
}
|
| 600 |
+
|
| 601 |
+
@media (max-width: 760px) {
|
| 602 |
+
.workbench-shell {
|
| 603 |
+
padding: 12px;
|
| 604 |
+
}
|
| 605 |
+
|
| 606 |
+
.summary-strip,
|
| 607 |
+
.controls-grid,
|
| 608 |
+
.controls-grid-parallelism,
|
| 609 |
+
.field-grid,
|
| 610 |
+
.field-grid-wide,
|
| 611 |
+
.inspector-grid {
|
| 612 |
+
grid-template-columns: 1fr;
|
| 613 |
+
}
|
| 614 |
+
|
| 615 |
+
.topology-surface-wrap {
|
| 616 |
+
min-height: 560px;
|
| 617 |
+
height: 64vh;
|
| 618 |
+
}
|
| 619 |
+
|
| 620 |
+
.fullscreen-overlay {
|
| 621 |
+
padding: 10px;
|
| 622 |
+
}
|
| 623 |
+
|
| 624 |
+
.fullscreen-shell {
|
| 625 |
+
max-height: calc(100vh - 20px);
|
| 626 |
+
padding: 10px;
|
| 627 |
+
}
|
| 628 |
+
|
| 629 |
+
.fullscreen-content .topology-surface-wrap,
|
| 630 |
+
.fullscreen-content .topology-surface-wrap {
|
| 631 |
+
min-height: 420px;
|
| 632 |
+
height: 62vh;
|
| 633 |
+
}
|
| 634 |
+
|
| 635 |
+
.scene-inspector,
|
| 636 |
+
.scene-debug-panel {
|
| 637 |
+
position: static;
|
| 638 |
+
max-width: none;
|
| 639 |
+
margin: 10px;
|
| 640 |
+
}
|
| 641 |
+
}
|
src/App.tsx
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { useEffect, useMemo, useState } from 'react'
|
| 2 |
+
import './App.css'
|
| 3 |
+
import { ClusterMap } from './components/ClusterMap'
|
| 4 |
+
import { ControlsPanel } from './components/ControlsPanel'
|
| 5 |
+
import { analyzeCluster } from './lib/trainingClusterModel'
|
| 6 |
+
import { getScenarioConfig, getViewOptions } from './lib/viewOptions'
|
| 7 |
+
import { buildWorkbenchViewModel } from './lib/workbenchPresenter'
|
| 8 |
+
import { type WorkbenchConfig } from './lib/workbench'
|
| 9 |
+
|
| 10 |
+
function App() {
|
| 11 |
+
const viewOptions = getViewOptions()
|
| 12 |
+
const [config, setConfig] = useState<WorkbenchConfig>(() =>
|
| 13 |
+
getScenarioConfig(viewOptions.scenario),
|
| 14 |
+
)
|
| 15 |
+
const [expandedView, setExpandedView] = useState<'cluster' | null>(null)
|
| 16 |
+
|
| 17 |
+
const analysis = useMemo(
|
| 18 |
+
() => analyzeCluster(config.model, config.training, config.cluster, config.parallelism),
|
| 19 |
+
[config],
|
| 20 |
+
)
|
| 21 |
+
const viewModel = useMemo(
|
| 22 |
+
() => buildWorkbenchViewModel(config, analysis),
|
| 23 |
+
[analysis, config],
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
useEffect(() => {
|
| 27 |
+
if (!expandedView) {
|
| 28 |
+
return undefined
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
const previousOverflow = document.body.style.overflow
|
| 32 |
+
document.body.style.overflow = 'hidden'
|
| 33 |
+
|
| 34 |
+
const handleKeyDown = (event: KeyboardEvent) => {
|
| 35 |
+
if (event.key === 'Escape') {
|
| 36 |
+
setExpandedView(null)
|
| 37 |
+
}
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
window.addEventListener('keydown', handleKeyDown)
|
| 41 |
+
|
| 42 |
+
return () => {
|
| 43 |
+
document.body.style.overflow = previousOverflow
|
| 44 |
+
window.removeEventListener('keydown', handleKeyDown)
|
| 45 |
+
}
|
| 46 |
+
}, [expandedView])
|
| 47 |
+
|
| 48 |
+
const handleConfigChange = (nextConfig: WorkbenchConfig) => {
|
| 49 |
+
setConfig(nextConfig)
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
const handleReset = () => {
|
| 53 |
+
setConfig(getScenarioConfig(viewOptions.scenario))
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
const clusterView = (
|
| 57 |
+
<section className="map-panel">
|
| 58 |
+
<div className="topology-header">
|
| 59 |
+
<div>
|
| 60 |
+
<p className="mini-label">Live cluster topology</p>
|
| 61 |
+
<h2>GPU fabric map</h2>
|
| 62 |
+
</div>
|
| 63 |
+
|
| 64 |
+
<div className="topology-header-actions">
|
| 65 |
+
<button
|
| 66 |
+
type="button"
|
| 67 |
+
className="scene-button"
|
| 68 |
+
onClick={() => setExpandedView('cluster')}
|
| 69 |
+
>
|
| 70 |
+
open full screen
|
| 71 |
+
</button>
|
| 72 |
+
</div>
|
| 73 |
+
</div>
|
| 74 |
+
|
| 75 |
+
<ClusterMap
|
| 76 |
+
viewModel={viewModel}
|
| 77 |
+
debugEnabled={viewOptions.debug}
|
| 78 |
+
snapshotMode={viewOptions.snapshot}
|
| 79 |
+
linkedFocus={null}
|
| 80 |
+
/>
|
| 81 |
+
</section>
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
return (
|
| 85 |
+
<div className="workbench-shell">
|
| 86 |
+
<header className="app-topbar">
|
| 87 |
+
<div className="title-block">
|
| 88 |
+
<p className="mini-label">Illustrated training cluster</p>
|
| 89 |
+
<h1>[WIP] Parallelism workbench</h1>
|
| 90 |
+
<p className="title-copy">{viewModel.subheadline}</p>
|
| 91 |
+
</div>
|
| 92 |
+
|
| 93 |
+
{!analysis.feasible ? (
|
| 94 |
+
<div className="status-banner status-banner-danger" data-testid="infeasible-banner">
|
| 95 |
+
<strong>Infeasible configuration</strong>
|
| 96 |
+
<span>{analysis.infeasibilityReason}</span>
|
| 97 |
+
</div>
|
| 98 |
+
) : null}
|
| 99 |
+
|
| 100 |
+
<section className="summary-strip" aria-label="simulation summary">
|
| 101 |
+
<div className="summary-card summary-card-wide">
|
| 102 |
+
<span>Scenario</span>
|
| 103 |
+
<strong>{viewModel.headline}</strong>
|
| 104 |
+
<p>
|
| 105 |
+
{config.cluster.numNodes} {config.cluster.nodeLabel ?? 'nodes'} · {config.cluster.gpuType.name}
|
| 106 |
+
{' · '}
|
| 107 |
+
{config.model.numLayers} layers · hidden {config.model.hiddenDim.toLocaleString()}
|
| 108 |
+
</p>
|
| 109 |
+
</div>
|
| 110 |
+
<div className="summary-card">
|
| 111 |
+
<span>Throughput</span>
|
| 112 |
+
<strong>{viewModel.summary.throughputLabel}</strong>
|
| 113 |
+
<p>{viewModel.summary.throughputNote}</p>
|
| 114 |
+
</div>
|
| 115 |
+
<div className="summary-card">
|
| 116 |
+
<span>Active GPUs</span>
|
| 117 |
+
<strong>{viewModel.summary.gpuLabel}</strong>
|
| 118 |
+
<p>{viewModel.summary.gpuNote}</p>
|
| 119 |
+
</div>
|
| 120 |
+
<div className="summary-card">
|
| 121 |
+
<span>Interconnect</span>
|
| 122 |
+
<strong>{viewModel.summary.interconnectLabel}</strong>
|
| 123 |
+
<p>{viewModel.summary.interconnectNote}</p>
|
| 124 |
+
</div>
|
| 125 |
+
<div className="summary-card">
|
| 126 |
+
<span>Bottleneck</span>
|
| 127 |
+
<strong>{viewModel.summary.bottleneckLabel}</strong>
|
| 128 |
+
<p>{viewModel.summary.bottleneckNote}</p>
|
| 129 |
+
</div>
|
| 130 |
+
</section>
|
| 131 |
+
</header>
|
| 132 |
+
|
| 133 |
+
<ControlsPanel
|
| 134 |
+
config={config}
|
| 135 |
+
onChange={handleConfigChange}
|
| 136 |
+
onReset={handleReset}
|
| 137 |
+
viewModel={viewModel}
|
| 138 |
+
/>
|
| 139 |
+
|
| 140 |
+
<main className="analysis-stack">
|
| 141 |
+
{expandedView !== 'cluster' ? clusterView : null}
|
| 142 |
+
|
| 143 |
+
<section className="side-card">
|
| 144 |
+
<div className="side-header">
|
| 145 |
+
<p className="mini-label">Run breakdown</p>
|
| 146 |
+
<h3>{config.cluster.gpuType.name}</h3>
|
| 147 |
+
</div>
|
| 148 |
+
|
| 149 |
+
<div className="facts-grid">
|
| 150 |
+
{viewModel.facts.map((fact) => (
|
| 151 |
+
<div key={fact.label} className="fact-row">
|
| 152 |
+
<span>{fact.label}</span>
|
| 153 |
+
<strong>{fact.value}</strong>
|
| 154 |
+
</div>
|
| 155 |
+
))}
|
| 156 |
+
</div>
|
| 157 |
+
|
| 158 |
+
<div className="warning-list" aria-live="polite">
|
| 159 |
+
{viewModel.warnings.map((warning) => (
|
| 160 |
+
<div key={warning} className="warning-pill">
|
| 161 |
+
{warning}
|
| 162 |
+
</div>
|
| 163 |
+
))}
|
| 164 |
+
</div>
|
| 165 |
+
</section>
|
| 166 |
+
</main>
|
| 167 |
+
|
| 168 |
+
{expandedView ? (
|
| 169 |
+
<div
|
| 170 |
+
className="fullscreen-overlay"
|
| 171 |
+
role="dialog"
|
| 172 |
+
aria-modal="true"
|
| 173 |
+
onClick={(event) => {
|
| 174 |
+
if (event.target === event.currentTarget) {
|
| 175 |
+
setExpandedView(null)
|
| 176 |
+
}
|
| 177 |
+
}}
|
| 178 |
+
>
|
| 179 |
+
<div className="fullscreen-shell">
|
| 180 |
+
<div className="fullscreen-toolbar">
|
| 181 |
+
<div>
|
| 182 |
+
<p className="mini-label">Expanded view</p>
|
| 183 |
+
<h2>GPU fabric map</h2>
|
| 184 |
+
</div>
|
| 185 |
+
|
| 186 |
+
<button
|
| 187 |
+
type="button"
|
| 188 |
+
className="scene-button"
|
| 189 |
+
onClick={() => setExpandedView(null)}
|
| 190 |
+
>
|
| 191 |
+
close full screen
|
| 192 |
+
</button>
|
| 193 |
+
</div>
|
| 194 |
+
|
| 195 |
+
<div className="fullscreen-content">
|
| 196 |
+
{clusterView}
|
| 197 |
+
</div>
|
| 198 |
+
</div>
|
| 199 |
+
</div>
|
| 200 |
+
) : null}
|
| 201 |
+
</div>
|
| 202 |
+
)
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
export default App
|
src/components/ClusterMap.tsx
ADDED
|
@@ -0,0 +1,2086 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import {
|
| 2 |
+
useApplication,
|
| 3 |
+
useExtend,
|
| 4 |
+
useTick,
|
| 5 |
+
} from '@pixi/react'
|
| 6 |
+
import {
|
| 7 |
+
Container,
|
| 8 |
+
Graphics,
|
| 9 |
+
Text,
|
| 10 |
+
Ticker,
|
| 11 |
+
type Graphics as PixiGraphics,
|
| 12 |
+
} from 'pixi.js'
|
| 13 |
+
import {
|
| 14 |
+
useCallback,
|
| 15 |
+
useEffect,
|
| 16 |
+
useMemo,
|
| 17 |
+
useRef,
|
| 18 |
+
useState,
|
| 19 |
+
type PointerEvent as ReactPointerEvent,
|
| 20 |
+
} from 'react'
|
| 21 |
+
import { PixiSurface } from './pixi/PixiSurface'
|
| 22 |
+
import {
|
| 23 |
+
buildTopologySceneModel,
|
| 24 |
+
describeTarget,
|
| 25 |
+
findHoverTarget,
|
| 26 |
+
getFitViewport,
|
| 27 |
+
worldToScreen,
|
| 28 |
+
type HoverTarget,
|
| 29 |
+
type SceneGpu,
|
| 30 |
+
type SceneNode,
|
| 31 |
+
type TargetDetails,
|
| 32 |
+
type TopologySceneModel,
|
| 33 |
+
type ViewportState,
|
| 34 |
+
} from '../lib/topologyScene'
|
| 35 |
+
import { matchesLinkedFocus, type LinkedFocus } from '../lib/linkedFocus'
|
| 36 |
+
import { type WorkbenchViewModel } from '../lib/workbenchPresenter'
|
| 37 |
+
import {
|
| 38 |
+
TOPOLOGY_LOD_POLICY,
|
| 39 |
+
getTopologyLodState,
|
| 40 |
+
mix,
|
| 41 |
+
screenStroke,
|
| 42 |
+
screenWorld,
|
| 43 |
+
type TopologyLodState,
|
| 44 |
+
} from '../lib/topologyLod'
|
| 45 |
+
|
| 46 |
+
type ClusterMapProps = {
|
| 47 |
+
viewModel: WorkbenchViewModel
|
| 48 |
+
debugEnabled: boolean
|
| 49 |
+
snapshotMode: boolean
|
| 50 |
+
linkedFocus: LinkedFocus | null
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
type DebugToggles = {
|
| 54 |
+
bounds: boolean
|
| 55 |
+
ids: boolean
|
| 56 |
+
heat: boolean
|
| 57 |
+
hitAreas: boolean
|
| 58 |
+
stats: boolean
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
type ScenePointer = {
|
| 62 |
+
x: number
|
| 63 |
+
y: number
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
type DebugObjectMap = Record<
|
| 67 |
+
string,
|
| 68 |
+
{
|
| 69 |
+
x: number
|
| 70 |
+
y: number
|
| 71 |
+
width: number
|
| 72 |
+
height: number
|
| 73 |
+
}
|
| 74 |
+
>
|
| 75 |
+
|
| 76 |
+
const MIN_SCALE = TOPOLOGY_LOD_POLICY.minScale
|
| 77 |
+
const MAX_SCALE = TOPOLOGY_LOD_POLICY.maxScale
|
| 78 |
+
|
| 79 |
+
const clamp = (value: number, min: number, max: number) =>
|
| 80 |
+
Math.min(Math.max(value, min), max)
|
| 81 |
+
|
| 82 |
+
type ViewportConstraints = {
|
| 83 |
+
minScale: number
|
| 84 |
+
maxScale: number
|
| 85 |
+
minX: number
|
| 86 |
+
maxX: number
|
| 87 |
+
minY: number
|
| 88 |
+
maxY: number
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
const getViewportConstraints = (
|
| 92 |
+
model: TopologySceneModel,
|
| 93 |
+
width: number,
|
| 94 |
+
height: number,
|
| 95 |
+
scale: number,
|
| 96 |
+
): ViewportConstraints => {
|
| 97 |
+
const fitViewport = getFitViewport(model, width, height)
|
| 98 |
+
const minScale = fitViewport.scale
|
| 99 |
+
const maxScale = clamp(Math.max(minScale * 180, minScale + 0.001), minScale, MAX_SCALE)
|
| 100 |
+
const safeScale = clamp(scale, minScale, maxScale)
|
| 101 |
+
const scaledWidth = model.width * safeScale
|
| 102 |
+
const scaledHeight = model.height * safeScale
|
| 103 |
+
const centeredX = (width - scaledWidth) / 2
|
| 104 |
+
const centeredY = (height - scaledHeight) / 2
|
| 105 |
+
|
| 106 |
+
if (scaledWidth <= width) {
|
| 107 |
+
return {
|
| 108 |
+
minScale,
|
| 109 |
+
maxScale,
|
| 110 |
+
minX: centeredX,
|
| 111 |
+
maxX: centeredX,
|
| 112 |
+
minY: scaledHeight <= height ? centeredY : height - scaledHeight,
|
| 113 |
+
maxY: scaledHeight <= height ? centeredY : 0,
|
| 114 |
+
}
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
if (scaledHeight <= height) {
|
| 118 |
+
return {
|
| 119 |
+
minScale,
|
| 120 |
+
maxScale,
|
| 121 |
+
minX: width - scaledWidth,
|
| 122 |
+
maxX: 0,
|
| 123 |
+
minY: centeredY,
|
| 124 |
+
maxY: centeredY,
|
| 125 |
+
}
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
return {
|
| 129 |
+
minScale,
|
| 130 |
+
maxScale,
|
| 131 |
+
minX: width - scaledWidth,
|
| 132 |
+
maxX: 0,
|
| 133 |
+
minY: height - scaledHeight,
|
| 134 |
+
maxY: 0,
|
| 135 |
+
}
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
const clampViewportToScene = (
|
| 139 |
+
nextViewport: ViewportState,
|
| 140 |
+
model: TopologySceneModel,
|
| 141 |
+
width: number,
|
| 142 |
+
height: number,
|
| 143 |
+
): ViewportState => {
|
| 144 |
+
if (width <= 0 || height <= 0) {
|
| 145 |
+
return nextViewport
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
const constraints = getViewportConstraints(model, width, height, nextViewport.scale)
|
| 149 |
+
const scale = clamp(nextViewport.scale, constraints.minScale, constraints.maxScale)
|
| 150 |
+
const clamped = getViewportConstraints(model, width, height, scale)
|
| 151 |
+
|
| 152 |
+
return {
|
| 153 |
+
scale,
|
| 154 |
+
x: clamp(nextViewport.x, clamped.minX, clamped.maxX),
|
| 155 |
+
y: clamp(nextViewport.y, clamped.minY, clamped.maxY),
|
| 156 |
+
}
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
const noopDraw = (graphics: PixiGraphics) => {
|
| 160 |
+
graphics.clear()
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
const pulse = (timeMs: number, offset: number, depth: number) =>
|
| 164 |
+
1 + Math.sin(timeMs / 1000 * 1.8 + offset) * depth
|
| 165 |
+
|
| 166 |
+
const drawCornerFocus = (
|
| 167 |
+
graphics: PixiGraphics,
|
| 168 |
+
bounds: { x: number; y: number; width: number; height: number },
|
| 169 |
+
scale: number,
|
| 170 |
+
color: number,
|
| 171 |
+
alpha: number,
|
| 172 |
+
lengthPx: number,
|
| 173 |
+
insetPx: number,
|
| 174 |
+
strokePx: number,
|
| 175 |
+
) => {
|
| 176 |
+
const length = screenStroke(scale, lengthPx, 0.3, 16)
|
| 177 |
+
const inset = screenStroke(scale, insetPx, 0.12, 8)
|
| 178 |
+
const stroke = screenStroke(scale, strokePx, 0.08, 2.4)
|
| 179 |
+
const left = bounds.x - inset
|
| 180 |
+
const top = bounds.y - inset
|
| 181 |
+
const right = bounds.x + bounds.width + inset
|
| 182 |
+
const bottom = bounds.y + bounds.height + inset
|
| 183 |
+
|
| 184 |
+
graphics
|
| 185 |
+
.moveTo(left, top + length)
|
| 186 |
+
.lineTo(left, top)
|
| 187 |
+
.lineTo(left + length, top)
|
| 188 |
+
.stroke({ color, alpha, width: stroke, cap: 'square', join: 'miter' })
|
| 189 |
+
graphics
|
| 190 |
+
.moveTo(right - length, top)
|
| 191 |
+
.lineTo(right, top)
|
| 192 |
+
.lineTo(right, top + length)
|
| 193 |
+
.stroke({ color, alpha, width: stroke, cap: 'square', join: 'miter' })
|
| 194 |
+
graphics
|
| 195 |
+
.moveTo(left, bottom - length)
|
| 196 |
+
.lineTo(left, bottom)
|
| 197 |
+
.lineTo(left + length, bottom)
|
| 198 |
+
.stroke({ color, alpha, width: stroke, cap: 'square', join: 'miter' })
|
| 199 |
+
graphics
|
| 200 |
+
.moveTo(right - length, bottom)
|
| 201 |
+
.lineTo(right, bottom)
|
| 202 |
+
.lineTo(right, bottom - length)
|
| 203 |
+
.stroke({ color, alpha, width: stroke, cap: 'square', join: 'miter' })
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
function createDebugObjectMap(
|
| 207 |
+
model: TopologySceneModel,
|
| 208 |
+
viewport: ViewportState,
|
| 209 |
+
): DebugObjectMap {
|
| 210 |
+
const pods = Object.fromEntries(
|
| 211 |
+
model.pods.map((pod) => [pod.id, worldToScreen(pod.hitBounds, viewport)]),
|
| 212 |
+
)
|
| 213 |
+
const nodes = Object.fromEntries(
|
| 214 |
+
model.nodes.map((node) => [node.id, worldToScreen(node.hitBounds, viewport)]),
|
| 215 |
+
)
|
| 216 |
+
const gpus = Object.fromEntries(
|
| 217 |
+
model.nodes
|
| 218 |
+
.flatMap((node) => node.gpus)
|
| 219 |
+
.map((gpu) => [gpu.id, worldToScreen(gpu.hitBounds, viewport)]),
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
return {
|
| 223 |
+
...pods,
|
| 224 |
+
...nodes,
|
| 225 |
+
...gpus,
|
| 226 |
+
}
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
const screenRadius = (
|
| 230 |
+
scale: number,
|
| 231 |
+
pixels: number,
|
| 232 |
+
minWorld = 0.06,
|
| 233 |
+
maxWorld = 12,
|
| 234 |
+
) => screenWorld(scale, pixels, minWorld, maxWorld)
|
| 235 |
+
|
| 236 |
+
const makeRect = (x: number, y: number, width: number, height: number) => ({
|
| 237 |
+
x,
|
| 238 |
+
y,
|
| 239 |
+
width,
|
| 240 |
+
height,
|
| 241 |
+
})
|
| 242 |
+
|
| 243 |
+
const insetRect = (
|
| 244 |
+
rect: { x: number; y: number; width: number; height: number },
|
| 245 |
+
insetX: number,
|
| 246 |
+
insetY: number,
|
| 247 |
+
) =>
|
| 248 |
+
makeRect(
|
| 249 |
+
rect.x + insetX,
|
| 250 |
+
rect.y + insetY,
|
| 251 |
+
Math.max(rect.width - insetX * 2, 0.0001),
|
| 252 |
+
Math.max(rect.height - insetY * 2, 0.0001),
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
const getWorldViewportBounds = (
|
| 256 |
+
viewport: ViewportState,
|
| 257 |
+
width: number,
|
| 258 |
+
height: number,
|
| 259 |
+
paddingWorld: number,
|
| 260 |
+
) =>
|
| 261 |
+
makeRect(
|
| 262 |
+
-viewport.x / viewport.scale - paddingWorld,
|
| 263 |
+
-viewport.y / viewport.scale - paddingWorld,
|
| 264 |
+
width / viewport.scale + paddingWorld * 2,
|
| 265 |
+
height / viewport.scale + paddingWorld * 2,
|
| 266 |
+
)
|
| 267 |
+
|
| 268 |
+
const rectsIntersect = (
|
| 269 |
+
left: { x: number; y: number; width: number; height: number },
|
| 270 |
+
right: { x: number; y: number; width: number; height: number },
|
| 271 |
+
) =>
|
| 272 |
+
left.x <= right.x + right.width &&
|
| 273 |
+
left.x + left.width >= right.x &&
|
| 274 |
+
left.y <= right.y + right.height &&
|
| 275 |
+
left.y + left.height >= right.y
|
| 276 |
+
|
| 277 |
+
const lineBounds = (
|
| 278 |
+
x1: number,
|
| 279 |
+
y1: number,
|
| 280 |
+
x2: number,
|
| 281 |
+
y2: number,
|
| 282 |
+
pad: number,
|
| 283 |
+
) =>
|
| 284 |
+
makeRect(
|
| 285 |
+
Math.min(x1, x2) - pad,
|
| 286 |
+
Math.min(y1, y2) - pad,
|
| 287 |
+
Math.abs(x2 - x1) + pad * 2,
|
| 288 |
+
Math.abs(y2 - y1) + pad * 2,
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
function drawModule(
|
| 292 |
+
graphics: PixiGraphics,
|
| 293 |
+
gpu: SceneGpu,
|
| 294 |
+
scale: number,
|
| 295 |
+
linked: boolean,
|
| 296 |
+
lod: TopologyLodState,
|
| 297 |
+
emphasis: number,
|
| 298 |
+
) {
|
| 299 |
+
const outer = gpu.lodFrame
|
| 300 |
+
const projectedOuterWidth = outer.width * scale
|
| 301 |
+
const projectedOuterHeight = outer.height * scale
|
| 302 |
+
const activeLoad = gpu.active ? mix(0.42, 1, gpu.utilization) : 0
|
| 303 |
+
const shell = insetRect(outer, outer.width * 0.04, outer.height * 0.06)
|
| 304 |
+
const carrier = insetRect(shell, shell.width * 0.05, shell.height * 0.08)
|
| 305 |
+
const coldPlate = insetRect(carrier, carrier.width * 0.14, carrier.height * 0.18)
|
| 306 |
+
const packageFrame = insetRect(coldPlate, coldPlate.width * 0.1, coldPlate.height * 0.13)
|
| 307 |
+
const substrate = insetRect(packageFrame, packageFrame.width * 0.06, packageFrame.height * 0.1)
|
| 308 |
+
const interposer = insetRect(substrate, substrate.width * 0.1, substrate.height * 0.14)
|
| 309 |
+
const die = insetRect(interposer, interposer.width * 0.2, interposer.height * 0.2)
|
| 310 |
+
const dieGrid = insetRect(die, die.width * 0.04, die.height * 0.05)
|
| 311 |
+
const connectorStrip = makeRect(
|
| 312 |
+
shell.x + shell.width * 0.24,
|
| 313 |
+
shell.y + shell.height * 0.82,
|
| 314 |
+
shell.width * 0.52,
|
| 315 |
+
shell.height * 0.08,
|
| 316 |
+
)
|
| 317 |
+
const boardStroke = linked ? 0xffefc0 : 0xcfdbe2
|
| 318 |
+
const overview = Math.max(lod.weights.overview - lod.weights.board * 0.18, 0)
|
| 319 |
+
const board = Math.max(lod.weights.board - lod.weights.package * 0.42, 0)
|
| 320 |
+
const packageAlpha = Math.max(lod.weights.package - lod.weights.silicon * 0.52, 0)
|
| 321 |
+
const siliconAlpha = Math.max(lod.weights.silicon - lod.weights.micro * 0.4, 0)
|
| 322 |
+
const microAlpha = lod.weights.micro
|
| 323 |
+
const boardPresence = Math.max(
|
| 324 |
+
lod.weights.board,
|
| 325 |
+
lod.weights.package * 0.84,
|
| 326 |
+
lod.weights.silicon * 0.66,
|
| 327 |
+
)
|
| 328 |
+
const coldPlatePresence = Math.max(board * 0.7, packageAlpha * 0.88, siliconAlpha * 0.9, microAlpha * 0.8)
|
| 329 |
+
const shellAlpha = mix(gpu.active ? 0.84 : 0.42, gpu.active ? 0.96 : 0.56, boardPresence)
|
| 330 |
+
const frameAlpha = emphasis * (linked ? 0.92 : 0.56)
|
| 331 |
+
const boardStrokeWidth = screenStroke(scale, linked ? 1.25 : 0.9, 0.08, 0.95)
|
| 332 |
+
const detailStroke = screenStroke(scale, 0.6, 0.03, 0.5)
|
| 333 |
+
const boardCorner = screenRadius(scale, 8, 0.18, 2.6)
|
| 334 |
+
const innerCorner = screenRadius(scale, 5, 0.16, 2)
|
| 335 |
+
const dieCorner = screenRadius(scale, 4, 0.14, 1.5)
|
| 336 |
+
const renderCarrier = projectedOuterWidth >= 10 && projectedOuterHeight >= 8
|
| 337 |
+
const renderColdPlate = projectedOuterWidth >= 14 && projectedOuterHeight >= 10
|
| 338 |
+
const renderOverviewGlyph = overview > 0.02 && projectedOuterWidth >= 10
|
| 339 |
+
const renderConnectorStrip = (overview > 0.02 || board > 0.02) && projectedOuterWidth >= 15
|
| 340 |
+
const renderBoardTier = board > 0.03 && projectedOuterWidth >= 18
|
| 341 |
+
const renderPackageTier = packageAlpha > 0.04 && projectedOuterWidth >= 30
|
| 342 |
+
const renderSiliconTier = siliconAlpha > 0.05 && die.width * scale >= 26
|
| 343 |
+
const renderMicroTier = microAlpha > 0.06 && die.width * scale >= 72
|
| 344 |
+
const glowFrame = makeRect(
|
| 345 |
+
shell.x - outer.width * 0.035,
|
| 346 |
+
shell.y - outer.height * 0.05,
|
| 347 |
+
shell.width + outer.width * 0.07,
|
| 348 |
+
shell.height + outer.height * 0.1,
|
| 349 |
+
)
|
| 350 |
+
|
| 351 |
+
if (activeLoad > 0.001) {
|
| 352 |
+
graphics
|
| 353 |
+
.roundRect(
|
| 354 |
+
glowFrame.x,
|
| 355 |
+
glowFrame.y,
|
| 356 |
+
glowFrame.width,
|
| 357 |
+
glowFrame.height,
|
| 358 |
+
screenRadius(scale, 10, 0.22, 3),
|
| 359 |
+
)
|
| 360 |
+
.fill({
|
| 361 |
+
color: 0x59e7d2,
|
| 362 |
+
alpha:
|
| 363 |
+
emphasis *
|
| 364 |
+
mix(
|
| 365 |
+
projectedOuterWidth < 18 ? 0.08 : 0.04,
|
| 366 |
+
projectedOuterWidth < 18 ? 0.2 : 0.1,
|
| 367 |
+
activeLoad,
|
| 368 |
+
),
|
| 369 |
+
})
|
| 370 |
+
}
|
| 371 |
+
|
| 372 |
+
graphics
|
| 373 |
+
.roundRect(shell.x, shell.y, shell.width, shell.height, boardCorner)
|
| 374 |
+
.fill({ color: gpu.active ? 0x0d1f29 : 0x0b1821, alpha: shellAlpha * emphasis })
|
| 375 |
+
.stroke({ color: boardStroke, alpha: frameAlpha, width: boardStrokeWidth })
|
| 376 |
+
|
| 377 |
+
if (projectedOuterWidth < 8 || projectedOuterHeight < 6) {
|
| 378 |
+
if (activeLoad > 0.001) {
|
| 379 |
+
const signalWidth = Math.min(
|
| 380 |
+
shell.width * 0.54,
|
| 381 |
+
screenWorld(scale, 5.6, 0.14, shell.width * 0.54),
|
| 382 |
+
)
|
| 383 |
+
const signalHeight = Math.min(
|
| 384 |
+
shell.height * 0.34,
|
| 385 |
+
screenWorld(scale, 2.8, 0.1, shell.height * 0.34),
|
| 386 |
+
)
|
| 387 |
+
const signalX = shell.x + (shell.width - signalWidth) / 2
|
| 388 |
+
const signalY = shell.y + (shell.height - signalHeight) / 2
|
| 389 |
+
|
| 390 |
+
graphics
|
| 391 |
+
.roundRect(
|
| 392 |
+
signalX,
|
| 393 |
+
signalY,
|
| 394 |
+
signalWidth,
|
| 395 |
+
signalHeight,
|
| 396 |
+
screenRadius(scale, 2.2, 0.05, 0.34),
|
| 397 |
+
)
|
| 398 |
+
.fill({
|
| 399 |
+
color: 0x76f1df,
|
| 400 |
+
alpha: emphasis * mix(0.68, 1, activeLoad),
|
| 401 |
+
})
|
| 402 |
+
}
|
| 403 |
+
|
| 404 |
+
return
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
if (projectedOuterWidth < 15 || projectedOuterHeight < 10) {
|
| 408 |
+
const core = insetRect(shell, shell.width * 0.3, shell.height * 0.28)
|
| 409 |
+
graphics
|
| 410 |
+
.roundRect(
|
| 411 |
+
core.x,
|
| 412 |
+
core.y,
|
| 413 |
+
core.width,
|
| 414 |
+
core.height,
|
| 415 |
+
screenRadius(scale, 1.8, 0.04, 0.4),
|
| 416 |
+
)
|
| 417 |
+
.fill({
|
| 418 |
+
color: gpu.active ? 0x6ce9d7 : 0x193843,
|
| 419 |
+
alpha: emphasis * (gpu.active ? mix(0.6, 0.95, activeLoad) : 0.36),
|
| 420 |
+
})
|
| 421 |
+
return
|
| 422 |
+
}
|
| 423 |
+
|
| 424 |
+
if (renderCarrier) {
|
| 425 |
+
graphics
|
| 426 |
+
.roundRect(carrier.x, carrier.y, carrier.width, carrier.height, innerCorner)
|
| 427 |
+
.fill({
|
| 428 |
+
color: gpu.active ? 0x112833 : 0x10202a,
|
| 429 |
+
alpha: mix(0.56, 0.82, boardPresence) * emphasis,
|
| 430 |
+
})
|
| 431 |
+
}
|
| 432 |
+
|
| 433 |
+
if (renderColdPlate) {
|
| 434 |
+
graphics
|
| 435 |
+
.roundRect(
|
| 436 |
+
coldPlate.x,
|
| 437 |
+
coldPlate.y,
|
| 438 |
+
coldPlate.width,
|
| 439 |
+
coldPlate.height,
|
| 440 |
+
screenRadius(scale, 4.5, 0.12, 1.8),
|
| 441 |
+
)
|
| 442 |
+
.fill({
|
| 443 |
+
color: 0x163643,
|
| 444 |
+
alpha:
|
| 445 |
+
mix(0.02, 0.34, coldPlatePresence) *
|
| 446 |
+
emphasis *
|
| 447 |
+
Math.max(1 - microAlpha * 0.24, 0.76),
|
| 448 |
+
})
|
| 449 |
+
}
|
| 450 |
+
|
| 451 |
+
if (renderConnectorStrip) {
|
| 452 |
+
const connectorAlpha = Math.max(overview * 0.8, board * 0.55) * emphasis * (gpu.active ? 0.84 : 0.36)
|
| 453 |
+
const padCount = 6
|
| 454 |
+
const padWidth = connectorStrip.width * 0.11
|
| 455 |
+
const padGap = connectorStrip.width * 0.05
|
| 456 |
+
const totalWidth = padCount * padWidth + (padCount - 1) * padGap
|
| 457 |
+
const padStart = connectorStrip.x + (connectorStrip.width - totalWidth) / 2
|
| 458 |
+
|
| 459 |
+
for (let index = 0; index < padCount; index += 1) {
|
| 460 |
+
const padX = padStart + index * (padWidth + padGap)
|
| 461 |
+
graphics
|
| 462 |
+
.roundRect(
|
| 463 |
+
padX,
|
| 464 |
+
connectorStrip.y,
|
| 465 |
+
padWidth,
|
| 466 |
+
connectorStrip.height,
|
| 467 |
+
screenRadius(scale, 2, 0.04, 0.6),
|
| 468 |
+
)
|
| 469 |
+
.fill({ color: 0xd6ba72, alpha: connectorAlpha })
|
| 470 |
+
}
|
| 471 |
+
}
|
| 472 |
+
|
| 473 |
+
if (renderOverviewGlyph) {
|
| 474 |
+
const moduleWindow = insetRect(carrier, carrier.width * 0.24, carrier.height * 0.26)
|
| 475 |
+
const dieWindow = makeRect(
|
| 476 |
+
moduleWindow.x + moduleWindow.width * 0.31,
|
| 477 |
+
moduleWindow.y + moduleWindow.height * 0.26,
|
| 478 |
+
moduleWindow.width * 0.38,
|
| 479 |
+
moduleWindow.height * 0.48,
|
| 480 |
+
)
|
| 481 |
+
graphics
|
| 482 |
+
.roundRect(
|
| 483 |
+
moduleWindow.x,
|
| 484 |
+
moduleWindow.y,
|
| 485 |
+
moduleWindow.width,
|
| 486 |
+
moduleWindow.height,
|
| 487 |
+
screenRadius(scale, 2.8, 0.06, 0.9),
|
| 488 |
+
)
|
| 489 |
+
.fill({
|
| 490 |
+
color: gpu.active ? 0x235560 : 0x1a3d48,
|
| 491 |
+
alpha: overview * emphasis * mix(gpu.active ? 0.5 : 0.42, gpu.active ? 0.82 : 0.42, activeLoad),
|
| 492 |
+
})
|
| 493 |
+
|
| 494 |
+
for (const x of [
|
| 495 |
+
moduleWindow.x + moduleWindow.width * 0.14,
|
| 496 |
+
moduleWindow.x + moduleWindow.width * 0.76,
|
| 497 |
+
]) {
|
| 498 |
+
graphics
|
| 499 |
+
.roundRect(
|
| 500 |
+
x,
|
| 501 |
+
moduleWindow.y + moduleWindow.height * 0.28,
|
| 502 |
+
moduleWindow.width * 0.08,
|
| 503 |
+
moduleWindow.height * 0.44,
|
| 504 |
+
screenRadius(scale, 1.3, 0.03, 0.35),
|
| 505 |
+
)
|
| 506 |
+
.fill({
|
| 507 |
+
color: gpu.active ? 0xdaf08e : 0xcddd73,
|
| 508 |
+
alpha: overview * emphasis * mix(gpu.active ? 0.8 : 0.62, 1, activeLoad * 0.7),
|
| 509 |
+
})
|
| 510 |
+
}
|
| 511 |
+
|
| 512 |
+
graphics
|
| 513 |
+
.roundRect(
|
| 514 |
+
dieWindow.x,
|
| 515 |
+
dieWindow.y,
|
| 516 |
+
dieWindow.width,
|
| 517 |
+
dieWindow.height,
|
| 518 |
+
screenRadius(scale, 1.7, 0.03, 0.42),
|
| 519 |
+
)
|
| 520 |
+
.fill({
|
| 521 |
+
color: gpu.active ? 0x0b1820 : 0x081219,
|
| 522 |
+
alpha: overview * emphasis * mix(gpu.active ? 0.92 : 0.86, 1, activeLoad * 0.4),
|
| 523 |
+
})
|
| 524 |
+
}
|
| 525 |
+
|
| 526 |
+
if (renderBoardTier) {
|
| 527 |
+
graphics
|
| 528 |
+
.roundRect(
|
| 529 |
+
coldPlate.x,
|
| 530 |
+
coldPlate.y,
|
| 531 |
+
coldPlate.width,
|
| 532 |
+
coldPlate.height,
|
| 533 |
+
screenRadius(scale, 4.5, 0.1, 1.2),
|
| 534 |
+
)
|
| 535 |
+
.stroke({
|
| 536 |
+
color: 0x88b9c6,
|
| 537 |
+
alpha: board * emphasis * 0.34,
|
| 538 |
+
width: detailStroke,
|
| 539 |
+
})
|
| 540 |
+
|
| 541 |
+
const mountRadius = screenWorld(scale, 2.6, 0.03, 0.26)
|
| 542 |
+
const mountAlpha = board * emphasis * (gpu.active ? 0.32 : 0.14)
|
| 543 |
+
for (const [x, y] of [
|
| 544 |
+
[carrier.x + carrier.width * 0.16, carrier.y + carrier.height * 0.2],
|
| 545 |
+
[carrier.x + carrier.width * 0.84, carrier.y + carrier.height * 0.2],
|
| 546 |
+
[carrier.x + carrier.width * 0.16, carrier.y + carrier.height * 0.74],
|
| 547 |
+
[carrier.x + carrier.width * 0.84, carrier.y + carrier.height * 0.74],
|
| 548 |
+
]) {
|
| 549 |
+
graphics.circle(x, y, mountRadius).fill({ color: 0x8ab7b7, alpha: mountAlpha })
|
| 550 |
+
}
|
| 551 |
+
|
| 552 |
+
if (activeLoad > 0.001) {
|
| 553 |
+
const liveZone = insetRect(coldPlate, coldPlate.width * 0.3, coldPlate.height * 0.28)
|
| 554 |
+
graphics
|
| 555 |
+
.roundRect(
|
| 556 |
+
liveZone.x,
|
| 557 |
+
liveZone.y,
|
| 558 |
+
liveZone.width,
|
| 559 |
+
liveZone.height,
|
| 560 |
+
screenRadius(scale, 3, 0.06, 0.8),
|
| 561 |
+
)
|
| 562 |
+
.fill({
|
| 563 |
+
color: 0x64e6d4,
|
| 564 |
+
alpha: board * emphasis * mix(0.12, 0.28, activeLoad),
|
| 565 |
+
})
|
| 566 |
+
}
|
| 567 |
+
}
|
| 568 |
+
|
| 569 |
+
if (renderPackageTier) {
|
| 570 |
+
graphics
|
| 571 |
+
.roundRect(packageFrame.x, packageFrame.y, packageFrame.width, packageFrame.height, innerCorner)
|
| 572 |
+
.stroke({ color: 0xb7c7cd, alpha: packageAlpha * emphasis * 0.8, width: detailStroke })
|
| 573 |
+
|
| 574 |
+
graphics
|
| 575 |
+
.roundRect(substrate.x, substrate.y, substrate.width, substrate.height, innerCorner)
|
| 576 |
+
.fill({ color: 0x294546, alpha: packageAlpha * emphasis * 0.34 })
|
| 577 |
+
|
| 578 |
+
graphics
|
| 579 |
+
.roundRect(interposer.x, interposer.y, interposer.width, interposer.height, innerCorner)
|
| 580 |
+
.fill({ color: 0x2a5960, alpha: packageAlpha * emphasis * 0.3 })
|
| 581 |
+
.stroke({ color: 0x9deedb, alpha: packageAlpha * emphasis * 0.18, width: detailStroke })
|
| 582 |
+
|
| 583 |
+
const hbmWidth = interposer.width * 0.18
|
| 584 |
+
const hbmHeight = interposer.height * 0.16
|
| 585 |
+
for (let index = 0; index < 4; index += 1) {
|
| 586 |
+
const hbmX = interposer.x + interposer.width * 0.04 + index * (hbmWidth + interposer.width * 0.03)
|
| 587 |
+
for (const y of [interposer.y + interposer.height * 0.09, interposer.y + interposer.height * 0.75]) {
|
| 588 |
+
graphics
|
| 589 |
+
.roundRect(
|
| 590 |
+
hbmX,
|
| 591 |
+
y,
|
| 592 |
+
hbmWidth,
|
| 593 |
+
hbmHeight,
|
| 594 |
+
screenRadius(scale, 2, 0.04, 0.45),
|
| 595 |
+
)
|
| 596 |
+
.fill({ color: 0xcfd86f, alpha: packageAlpha * emphasis * 0.7 })
|
| 597 |
+
}
|
| 598 |
+
}
|
| 599 |
+
|
| 600 |
+
graphics
|
| 601 |
+
.roundRect(die.x, die.y, die.width, die.height, dieCorner)
|
| 602 |
+
.fill({ color: 0x09161d, alpha: packageAlpha * emphasis * 0.76 })
|
| 603 |
+
.stroke({ color: 0x8bdacd, alpha: packageAlpha * emphasis * 0.24, width: detailStroke })
|
| 604 |
+
}
|
| 605 |
+
|
| 606 |
+
if (renderSiliconTier) {
|
| 607 |
+
graphics
|
| 608 |
+
.roundRect(die.x, die.y, die.width, die.height, dieCorner)
|
| 609 |
+
.fill({ color: 0x0c1c22, alpha: siliconAlpha * emphasis * 0.58 })
|
| 610 |
+
|
| 611 |
+
const tileColumns = 7
|
| 612 |
+
const tileRows = 5
|
| 613 |
+
const tileWidth = dieGrid.width / tileColumns
|
| 614 |
+
const tileHeight = dieGrid.height / tileRows
|
| 615 |
+
for (let row = 0; row < tileRows; row += 1) {
|
| 616 |
+
for (let column = 0; column < tileColumns; column += 1) {
|
| 617 |
+
const tileX = dieGrid.x + column * tileWidth
|
| 618 |
+
const tileY = dieGrid.y + row * tileHeight
|
| 619 |
+
const tileFill =
|
| 620 |
+
column === 0
|
| 621 |
+
? 0xa2d8ec
|
| 622 |
+
: row === 0 || row === tileRows - 1
|
| 623 |
+
? 0x7fb7ca
|
| 624 |
+
: 0xb8ece2
|
| 625 |
+
graphics
|
| 626 |
+
.roundRect(
|
| 627 |
+
tileX + tileWidth * 0.08,
|
| 628 |
+
tileY + tileHeight * 0.12,
|
| 629 |
+
tileWidth * 0.8,
|
| 630 |
+
tileHeight * 0.72,
|
| 631 |
+
screenRadius(scale, 1.2, 0.03, 0.26),
|
| 632 |
+
)
|
| 633 |
+
.fill({ color: tileFill, alpha: siliconAlpha * emphasis * (column === 0 ? 0.22 : 0.14) })
|
| 634 |
+
}
|
| 635 |
+
}
|
| 636 |
+
|
| 637 |
+
for (const block of [
|
| 638 |
+
makeRect(die.x + die.width * 0.06, die.y + die.height * 0.18, die.width * 0.14, die.height * 0.64),
|
| 639 |
+
makeRect(die.x + die.width * 0.78, die.y + die.height * 0.26, die.width * 0.1, die.height * 0.48),
|
| 640 |
+
]) {
|
| 641 |
+
graphics
|
| 642 |
+
.roundRect(
|
| 643 |
+
block.x,
|
| 644 |
+
block.y,
|
| 645 |
+
block.width,
|
| 646 |
+
block.height,
|
| 647 |
+
screenRadius(scale, 1.2, 0.03, 0.3),
|
| 648 |
+
)
|
| 649 |
+
.fill({ color: 0xaee6ff, alpha: siliconAlpha * emphasis * 0.14 })
|
| 650 |
+
}
|
| 651 |
+
}
|
| 652 |
+
|
| 653 |
+
if (renderMicroTier) {
|
| 654 |
+
const cellColumns = 38
|
| 655 |
+
const cellRows = 24
|
| 656 |
+
const cellWidth = dieGrid.width / cellColumns
|
| 657 |
+
const cellHeight = dieGrid.height / cellRows
|
| 658 |
+
const cellAlpha = microAlpha * emphasis * 0.22
|
| 659 |
+
for (let row = 0; row < cellRows; row += 1) {
|
| 660 |
+
for (let column = 0; column < cellColumns; column += 1) {
|
| 661 |
+
const x = dieGrid.x + column * cellWidth
|
| 662 |
+
const y = dieGrid.y + row * cellHeight
|
| 663 |
+
const edgeZone = column < 4 || column > cellColumns - 5 || row < 2 || row > cellRows - 3
|
| 664 |
+
const seam = column % 6 === 0 || row % 5 === 0
|
| 665 |
+
const primaryColor = edgeZone
|
| 666 |
+
? 0x79afbd
|
| 667 |
+
: seam
|
| 668 |
+
? 0x91d2dc
|
| 669 |
+
: (row + column) % 5 === 0
|
| 670 |
+
? 0xc7fff0
|
| 671 |
+
: (row + column) % 3 === 0
|
| 672 |
+
? 0x94d9ef
|
| 673 |
+
: 0xafe9dc
|
| 674 |
+
graphics
|
| 675 |
+
.roundRect(
|
| 676 |
+
x + cellWidth * 0.12,
|
| 677 |
+
y + cellHeight * 0.16,
|
| 678 |
+
cellWidth * 0.72,
|
| 679 |
+
cellHeight * 0.56,
|
| 680 |
+
screenRadius(scale, 0.18, 0.002, 0.05),
|
| 681 |
+
)
|
| 682 |
+
.fill({ color: primaryColor, alpha: cellAlpha * (seam ? 0.58 : 1) })
|
| 683 |
+
}
|
| 684 |
+
}
|
| 685 |
+
}
|
| 686 |
+
}
|
| 687 |
+
|
| 688 |
+
function drawNodeShell(
|
| 689 |
+
graphics: PixiGraphics,
|
| 690 |
+
node: SceneNode,
|
| 691 |
+
scale: number,
|
| 692 |
+
linked: boolean,
|
| 693 |
+
heatEnabled: boolean,
|
| 694 |
+
lod: TopologyLodState,
|
| 695 |
+
emphasis: number,
|
| 696 |
+
) {
|
| 697 |
+
const shellAlpha = mix(0.04, 0.14, lod.weights.board) * emphasis
|
| 698 |
+
const trayOutlineAlpha = mix(0.08, 0.22, lod.weights.board) * emphasis
|
| 699 |
+
const nodeRadius = screenRadius(scale, 18, 0.8, 10)
|
| 700 |
+
|
| 701 |
+
graphics
|
| 702 |
+
.roundRect(node.x, node.y, node.width, node.height, nodeRadius)
|
| 703 |
+
.fill({ color: 0x09131b, alpha: 0.86 })
|
| 704 |
+
.stroke({
|
| 705 |
+
color: linked ? 0xffdc8a : 0x6fd9cd,
|
| 706 |
+
alpha: linked ? 0.82 : trayOutlineAlpha,
|
| 707 |
+
width: screenStroke(scale, linked ? 1.2 : 0.7, 0.08, 0.85),
|
| 708 |
+
})
|
| 709 |
+
|
| 710 |
+
if (shellAlpha > 0.02) {
|
| 711 |
+
graphics
|
| 712 |
+
.roundRect(
|
| 713 |
+
node.x + 2.5,
|
| 714 |
+
node.y + 2.5,
|
| 715 |
+
node.width - 5,
|
| 716 |
+
node.height - 5,
|
| 717 |
+
screenRadius(scale, 14, 0.6, 8),
|
| 718 |
+
)
|
| 719 |
+
.fill({ color: 0x0b1720, alpha: shellAlpha })
|
| 720 |
+
}
|
| 721 |
+
|
| 722 |
+
if (heatEnabled) {
|
| 723 |
+
graphics
|
| 724 |
+
.roundRect(node.x + 6, node.y + 6, node.width - 12, node.height - 12, 8)
|
| 725 |
+
.fill({
|
| 726 |
+
color: 0xe58a43,
|
| 727 |
+
alpha: node.interNodeLoad * 0.08 * emphasis,
|
| 728 |
+
})
|
| 729 |
+
}
|
| 730 |
+
}
|
| 731 |
+
|
| 732 |
+
function drawCampusPods(
|
| 733 |
+
graphics: PixiGraphics,
|
| 734 |
+
model: TopologySceneModel,
|
| 735 |
+
scale: number,
|
| 736 |
+
lod: TopologyLodState,
|
| 737 |
+
visiblePods: typeof model.pods,
|
| 738 |
+
podEmphasis: (podId: string) => number,
|
| 739 |
+
) {
|
| 740 |
+
const rackFabricAlpha = mix(0.02, 0.08, lod.weights.overview)
|
| 741 |
+
|
| 742 |
+
for (let row = 0; row < model.podRows; row += 1) {
|
| 743 |
+
const rowPods = model.pods.slice(row * model.podColumns, row * model.podColumns + model.podColumns)
|
| 744 |
+
if (rowPods.length < 2) {
|
| 745 |
+
continue
|
| 746 |
+
}
|
| 747 |
+
|
| 748 |
+
graphics
|
| 749 |
+
.moveTo(rowPods[0].centerX, rowPods[0].centerY)
|
| 750 |
+
.lineTo(rowPods[rowPods.length - 1].centerX, rowPods[rowPods.length - 1].centerY)
|
| 751 |
+
.stroke({
|
| 752 |
+
color: 0xf1b067,
|
| 753 |
+
alpha: rackFabricAlpha * Math.min(podEmphasis(rowPods[0].id), podEmphasis(rowPods[rowPods.length - 1].id)),
|
| 754 |
+
width: screenStroke(scale, 2.4, 0.12, 2.2),
|
| 755 |
+
})
|
| 756 |
+
}
|
| 757 |
+
|
| 758 |
+
for (let column = 0; column < model.podColumns; column += 1) {
|
| 759 |
+
const columnPods = model.pods.filter((_, index) => index % model.podColumns === column)
|
| 760 |
+
if (columnPods.length < 2) {
|
| 761 |
+
continue
|
| 762 |
+
}
|
| 763 |
+
|
| 764 |
+
graphics
|
| 765 |
+
.moveTo(columnPods[0].centerX, columnPods[0].centerY)
|
| 766 |
+
.lineTo(columnPods[columnPods.length - 1].centerX, columnPods[columnPods.length - 1].centerY)
|
| 767 |
+
.stroke({
|
| 768 |
+
color: 0xf1b067,
|
| 769 |
+
alpha:
|
| 770 |
+
rackFabricAlpha *
|
| 771 |
+
Math.min(podEmphasis(columnPods[0].id), podEmphasis(columnPods[columnPods.length - 1].id)),
|
| 772 |
+
width: screenStroke(scale, 2.1, 0.12, 2),
|
| 773 |
+
})
|
| 774 |
+
}
|
| 775 |
+
|
| 776 |
+
const rackInnerAlpha = mix(0.02, 0.08, lod.weights.board)
|
| 777 |
+
|
| 778 |
+
for (const pod of visiblePods) {
|
| 779 |
+
const emphasis = podEmphasis(pod.id)
|
| 780 |
+
graphics
|
| 781 |
+
.roundRect(pod.x, pod.y, pod.width, pod.height, screenRadius(scale, 22, 1.2, 18))
|
| 782 |
+
.fill({
|
| 783 |
+
color: 0x08131c,
|
| 784 |
+
alpha: mix(pod.active ? 0.76 : 0.66, pod.active ? 0.88 : 0.8, lod.weights.board) * emphasis,
|
| 785 |
+
})
|
| 786 |
+
.stroke({
|
| 787 |
+
color: pod.active ? 0xe6dbb1 : 0x5ecfca,
|
| 788 |
+
alpha: (pod.active ? 0.34 : 0.14) * emphasis,
|
| 789 |
+
width: screenStroke(scale, pod.active ? 1.3 : 0.8, 0.08, 1),
|
| 790 |
+
})
|
| 791 |
+
|
| 792 |
+
if (rackInnerAlpha > 0.02) {
|
| 793 |
+
graphics
|
| 794 |
+
.roundRect(
|
| 795 |
+
pod.x + 8,
|
| 796 |
+
pod.y + 8,
|
| 797 |
+
pod.width - 16,
|
| 798 |
+
pod.height - 16,
|
| 799 |
+
screenRadius(scale, 18, 0.8, 14),
|
| 800 |
+
)
|
| 801 |
+
.stroke({
|
| 802 |
+
color: 0x6fd9cd,
|
| 803 |
+
alpha: rackInnerAlpha * emphasis,
|
| 804 |
+
width: screenStroke(scale, 0.45, 0.04, 0.5),
|
| 805 |
+
})
|
| 806 |
+
}
|
| 807 |
+
}
|
| 808 |
+
}
|
| 809 |
+
|
| 810 |
+
function TopologyScene({
|
| 811 |
+
model,
|
| 812 |
+
viewport,
|
| 813 |
+
surfaceSize,
|
| 814 |
+
hoveredTarget,
|
| 815 |
+
pinnedTarget,
|
| 816 |
+
linkedFocus,
|
| 817 |
+
linkedGpuIds,
|
| 818 |
+
linkedNodeIds,
|
| 819 |
+
linkedPodIds,
|
| 820 |
+
debugEnabled,
|
| 821 |
+
snapshotMode,
|
| 822 |
+
debugToggles,
|
| 823 |
+
onFpsChange,
|
| 824 |
+
}: {
|
| 825 |
+
model: TopologySceneModel
|
| 826 |
+
viewport: ViewportState
|
| 827 |
+
surfaceSize: { width: number; height: number }
|
| 828 |
+
hoveredTarget: HoverTarget | null
|
| 829 |
+
pinnedTarget: HoverTarget | null
|
| 830 |
+
linkedFocus: LinkedFocus | null
|
| 831 |
+
linkedGpuIds: Set<string>
|
| 832 |
+
linkedNodeIds: Set<string>
|
| 833 |
+
linkedPodIds: Set<string>
|
| 834 |
+
debugEnabled: boolean
|
| 835 |
+
snapshotMode: boolean
|
| 836 |
+
debugToggles: DebugToggles
|
| 837 |
+
onFpsChange: (value: number) => void
|
| 838 |
+
}) {
|
| 839 |
+
useExtend({ Container, Graphics, Text })
|
| 840 |
+
const { app } = useApplication()
|
| 841 |
+
const dynamicRef = useRef<PixiGraphics | null>(null)
|
| 842 |
+
const hoverRef = useRef<HoverTarget | null>(hoveredTarget)
|
| 843 |
+
const pinnedRef = useRef<HoverTarget | null>(pinnedTarget)
|
| 844 |
+
const statsRef = useRef({ elapsed: 0, frames: 0 })
|
| 845 |
+
const allGpus = useMemo(
|
| 846 |
+
() => model.nodes.flatMap((node) => node.gpus),
|
| 847 |
+
[model.nodes],
|
| 848 |
+
)
|
| 849 |
+
const gpuById = useMemo(() => new Map(allGpus.map((gpu) => [gpu.id, gpu])), [allGpus])
|
| 850 |
+
const nodeById = useMemo(() => new Map(model.nodes.map((node) => [node.id, node])), [model.nodes])
|
| 851 |
+
const podById = useMemo(() => new Map(model.pods.map((pod) => [pod.id, pod])), [model.pods])
|
| 852 |
+
const lodState = useMemo(() => getTopologyLodState(viewport.scale), [viewport.scale])
|
| 853 |
+
const worldViewportBounds = useMemo(
|
| 854 |
+
() =>
|
| 855 |
+
getWorldViewportBounds(
|
| 856 |
+
viewport,
|
| 857 |
+
surfaceSize.width,
|
| 858 |
+
surfaceSize.height,
|
| 859 |
+
screenWorld(viewport.scale, 180, 12, 240),
|
| 860 |
+
),
|
| 861 |
+
[surfaceSize.height, surfaceSize.width, viewport],
|
| 862 |
+
)
|
| 863 |
+
const visiblePods = useMemo(
|
| 864 |
+
() => model.pods.filter((pod) => rectsIntersect(pod.hitBounds, worldViewportBounds)),
|
| 865 |
+
[model.pods, worldViewportBounds],
|
| 866 |
+
)
|
| 867 |
+
const visibleNodes = useMemo(
|
| 868 |
+
() => model.nodes.filter((node) => rectsIntersect(node.hitBounds, worldViewportBounds)),
|
| 869 |
+
[model.nodes, worldViewportBounds],
|
| 870 |
+
)
|
| 871 |
+
const visibleGpus = useMemo(
|
| 872 |
+
() => visibleNodes.flatMap((node) => node.gpus),
|
| 873 |
+
[visibleNodes],
|
| 874 |
+
)
|
| 875 |
+
const visibleLinks = useMemo(
|
| 876 |
+
() => ({
|
| 877 |
+
row: model.rowLinks.filter((link) =>
|
| 878 |
+
rectsIntersect(lineBounds(link.x1, link.y1, link.x2, link.y2, link.hitWidth), worldViewportBounds),
|
| 879 |
+
),
|
| 880 |
+
column: model.columnLinks.filter((link) =>
|
| 881 |
+
rectsIntersect(lineBounds(link.x1, link.y1, link.x2, link.y2, link.hitWidth), worldViewportBounds),
|
| 882 |
+
),
|
| 883 |
+
bus: model.busLinks.filter((link) =>
|
| 884 |
+
rectsIntersect(lineBounds(link.x1, link.y1, link.x2, link.y2, link.hitWidth), worldViewportBounds),
|
| 885 |
+
),
|
| 886 |
+
}),
|
| 887 |
+
[model.busLinks, model.columnLinks, model.rowLinks, worldViewportBounds],
|
| 888 |
+
)
|
| 889 |
+
const visibleLinkCount = useMemo(
|
| 890 |
+
() => visibleLinks.row.length + visibleLinks.column.length + visibleLinks.bus.length,
|
| 891 |
+
[visibleLinks.bus.length, visibleLinks.column.length, visibleLinks.row.length],
|
| 892 |
+
)
|
| 893 |
+
|
| 894 |
+
useEffect(() => {
|
| 895 |
+
hoverRef.current = hoveredTarget
|
| 896 |
+
}, [hoveredTarget])
|
| 897 |
+
|
| 898 |
+
useEffect(() => {
|
| 899 |
+
pinnedRef.current = pinnedTarget
|
| 900 |
+
}, [pinnedTarget])
|
| 901 |
+
|
| 902 |
+
useEffect(() => {
|
| 903 |
+
if (debugEnabled || snapshotMode) {
|
| 904 |
+
window.__PIXI_TOPOLOGY_APP__ = app
|
| 905 |
+
return () => {
|
| 906 |
+
delete window.__PIXI_TOPOLOGY_APP__
|
| 907 |
+
}
|
| 908 |
+
}
|
| 909 |
+
|
| 910 |
+
return undefined
|
| 911 |
+
}, [app, debugEnabled, snapshotMode])
|
| 912 |
+
|
| 913 |
+
const getEmphasis = useCallback(
|
| 914 |
+
(kind: 'pod' | 'node' | 'gpu', id: string) => {
|
| 915 |
+
const focusTarget = pinnedRef.current ?? hoverRef.current
|
| 916 |
+
const base = 1
|
| 917 |
+
if (!focusTarget || lodState.deepIsolation <= 0.001) {
|
| 918 |
+
return base
|
| 919 |
+
}
|
| 920 |
+
|
| 921 |
+
const fadeTo = mix(1, 0.08, lodState.deepIsolation)
|
| 922 |
+
if (kind === 'gpu') {
|
| 923 |
+
if (focusTarget.kind === 'gpu') {
|
| 924 |
+
const gpu = gpuById.get(id)
|
| 925 |
+
const focusedGpu = gpuById.get(focusTarget.id)
|
| 926 |
+
if (!gpu || !focusedGpu) {
|
| 927 |
+
return fadeTo
|
| 928 |
+
}
|
| 929 |
+
if (gpu.id === focusedGpu.id) {
|
| 930 |
+
return 1
|
| 931 |
+
}
|
| 932 |
+
if (gpu.nodeId === focusedGpu.nodeId) {
|
| 933 |
+
return mix(1, 0.34, lodState.deepIsolation)
|
| 934 |
+
}
|
| 935 |
+
if (gpu.domainIndex === focusedGpu.domainIndex) {
|
| 936 |
+
return mix(1, 0.16, lodState.deepIsolation)
|
| 937 |
+
}
|
| 938 |
+
return fadeTo
|
| 939 |
+
}
|
| 940 |
+
|
| 941 |
+
if (focusTarget.kind === 'node') {
|
| 942 |
+
const gpu = gpuById.get(id)
|
| 943 |
+
const focusedNode = nodeById.get(focusTarget.id)
|
| 944 |
+
if (!gpu || !focusedNode) {
|
| 945 |
+
return fadeTo
|
| 946 |
+
}
|
| 947 |
+
if (gpu.nodeId === focusedNode.id) {
|
| 948 |
+
return mix(1, 0.9, lodState.deepIsolation * 0.2)
|
| 949 |
+
}
|
| 950 |
+
if (gpu.domainIndex === focusedNode.domainIndex) {
|
| 951 |
+
return mix(1, 0.18, lodState.deepIsolation)
|
| 952 |
+
}
|
| 953 |
+
return fadeTo
|
| 954 |
+
}
|
| 955 |
+
|
| 956 |
+
const gpu = gpuById.get(id)
|
| 957 |
+
const focusedPod = podById.get(focusTarget.id)
|
| 958 |
+
if (!gpu || !focusedPod) {
|
| 959 |
+
return fadeTo
|
| 960 |
+
}
|
| 961 |
+
return gpu.domainIndex === focusedPod.index ? mix(1, 0.72, lodState.deepIsolation * 0.3) : fadeTo
|
| 962 |
+
}
|
| 963 |
+
|
| 964 |
+
if (kind === 'node') {
|
| 965 |
+
const node = nodeById.get(id)
|
| 966 |
+
if (!node) {
|
| 967 |
+
return fadeTo
|
| 968 |
+
}
|
| 969 |
+
if (focusTarget.kind === 'gpu') {
|
| 970 |
+
const gpu = gpuById.get(focusTarget.id)
|
| 971 |
+
if (!gpu) {
|
| 972 |
+
return fadeTo
|
| 973 |
+
}
|
| 974 |
+
if (node.id === gpu.nodeId) {
|
| 975 |
+
return mix(1, 0.5, lodState.deepIsolation)
|
| 976 |
+
}
|
| 977 |
+
if (node.domainIndex === gpu.domainIndex) {
|
| 978 |
+
return mix(1, 0.18, lodState.deepIsolation)
|
| 979 |
+
}
|
| 980 |
+
return fadeTo
|
| 981 |
+
}
|
| 982 |
+
|
| 983 |
+
if (focusTarget.kind === 'node') {
|
| 984 |
+
const focusedNode = nodeById.get(focusTarget.id)
|
| 985 |
+
if (!focusedNode) {
|
| 986 |
+
return fadeTo
|
| 987 |
+
}
|
| 988 |
+
if (node.id === focusedNode.id) {
|
| 989 |
+
return 1
|
| 990 |
+
}
|
| 991 |
+
if (node.domainIndex === focusedNode.domainIndex) {
|
| 992 |
+
return mix(1, 0.2, lodState.deepIsolation)
|
| 993 |
+
}
|
| 994 |
+
return fadeTo
|
| 995 |
+
}
|
| 996 |
+
|
| 997 |
+
const focusedPod = podById.get(focusTarget.id)
|
| 998 |
+
if (!focusedPod) {
|
| 999 |
+
return fadeTo
|
| 1000 |
+
}
|
| 1001 |
+
return node.domainIndex === focusedPod.index ? mix(1, 0.3, lodState.deepIsolation) : fadeTo
|
| 1002 |
+
}
|
| 1003 |
+
|
| 1004 |
+
const pod = podById.get(id)
|
| 1005 |
+
if (!pod) {
|
| 1006 |
+
return fadeTo
|
| 1007 |
+
}
|
| 1008 |
+
if (focusTarget.kind === 'gpu') {
|
| 1009 |
+
const gpu = gpuById.get(focusTarget.id)
|
| 1010 |
+
return gpu && gpu.domainIndex === pod.index ? mix(1, 0.25, lodState.deepIsolation) : fadeTo
|
| 1011 |
+
}
|
| 1012 |
+
if (focusTarget.kind === 'node') {
|
| 1013 |
+
const node = nodeById.get(focusTarget.id)
|
| 1014 |
+
return node && node.domainIndex === pod.index ? mix(1, 0.32, lodState.deepIsolation) : fadeTo
|
| 1015 |
+
}
|
| 1016 |
+
return focusTarget.id === id ? 1 : fadeTo
|
| 1017 |
+
},
|
| 1018 |
+
[gpuById, lodState.deepIsolation, nodeById, podById],
|
| 1019 |
+
)
|
| 1020 |
+
|
| 1021 |
+
const drawStatic = useCallback(
|
| 1022 |
+
(graphics: PixiGraphics) => {
|
| 1023 |
+
graphics.clear()
|
| 1024 |
+
|
| 1025 |
+
drawCampusPods(graphics, model, viewport.scale, lodState, visiblePods, (podId) =>
|
| 1026 |
+
getEmphasis('pod', podId),
|
| 1027 |
+
)
|
| 1028 |
+
|
| 1029 |
+
const localStructurePresence = Math.max(
|
| 1030 |
+
lodState.weights.board,
|
| 1031 |
+
lodState.weights.package * 0.9,
|
| 1032 |
+
lodState.weights.silicon * 0.7,
|
| 1033 |
+
lodState.weights.micro * 0.45,
|
| 1034 |
+
)
|
| 1035 |
+
const connectorAlpha = 0.18 * localStructurePresence
|
| 1036 |
+
const linkPresence = mix(lodState.weights.overview * 0.35, 1, lodState.weights.board)
|
| 1037 |
+
const hubRadius = screenWorld(viewport.scale, 6, 0.1, 2.4)
|
| 1038 |
+
|
| 1039 |
+
const drawStaticLink = (link: (typeof model.rowLinks)[number]) => {
|
| 1040 |
+
const isRackScope = link.scope === 'rack'
|
| 1041 |
+
if (!isRackScope && localStructurePresence < 0.08) {
|
| 1042 |
+
return
|
| 1043 |
+
}
|
| 1044 |
+
|
| 1045 |
+
const rackFrom =
|
| 1046 |
+
isRackScope
|
| 1047 |
+
? model.pods.find((pod) => pod.centerX === link.x1 && pod.centerY === link.y1)
|
| 1048 |
+
: null
|
| 1049 |
+
const rackTo =
|
| 1050 |
+
isRackScope
|
| 1051 |
+
? model.pods.find((pod) => pod.centerX === link.x2 && pod.centerY === link.y2)
|
| 1052 |
+
: null
|
| 1053 |
+
const emphasis =
|
| 1054 |
+
isRackScope
|
| 1055 |
+
? Math.min(
|
| 1056 |
+
rackFrom ? getEmphasis('pod', rackFrom.id) : 1,
|
| 1057 |
+
rackTo ? getEmphasis('pod', rackTo.id) : 1,
|
| 1058 |
+
)
|
| 1059 |
+
: 1
|
| 1060 |
+
graphics
|
| 1061 |
+
.moveTo(link.x1, link.y1)
|
| 1062 |
+
.lineTo(link.x2, link.y2)
|
| 1063 |
+
.stroke({
|
| 1064 |
+
color: link.color,
|
| 1065 |
+
alpha:
|
| 1066 |
+
(isRackScope
|
| 1067 |
+
? 0.08 + link.load * 0.24
|
| 1068 |
+
: (0.04 + link.load * 0.12) * localStructurePresence) *
|
| 1069 |
+
linkPresence *
|
| 1070 |
+
emphasis,
|
| 1071 |
+
width: screenStroke(
|
| 1072 |
+
viewport.scale,
|
| 1073 |
+
isRackScope ? 1.6 + link.load * 2 : 0.75 + link.load * 0.9,
|
| 1074 |
+
0.05,
|
| 1075 |
+
2.2,
|
| 1076 |
+
),
|
| 1077 |
+
})
|
| 1078 |
+
}
|
| 1079 |
+
|
| 1080 |
+
visibleLinks.row.forEach(drawStaticLink)
|
| 1081 |
+
visibleLinks.column.forEach(drawStaticLink)
|
| 1082 |
+
visibleLinks.bus.forEach((link) => {
|
| 1083 |
+
if (localStructurePresence < 0.12) {
|
| 1084 |
+
return
|
| 1085 |
+
}
|
| 1086 |
+
|
| 1087 |
+
graphics
|
| 1088 |
+
.moveTo(link.x1, link.y1)
|
| 1089 |
+
.lineTo(link.x2, link.y2)
|
| 1090 |
+
.stroke({
|
| 1091 |
+
color: link.color,
|
| 1092 |
+
alpha: (0.05 + link.load * 0.16) * linkPresence * localStructurePresence,
|
| 1093 |
+
width: screenStroke(viewport.scale, 0.55 + link.load * 0.55, 0.05, 1.1),
|
| 1094 |
+
})
|
| 1095 |
+
})
|
| 1096 |
+
|
| 1097 |
+
for (const node of visibleNodes) {
|
| 1098 |
+
const nodeEmphasis = getEmphasis('node', node.id)
|
| 1099 |
+
if (localStructurePresence >= 0.08) {
|
| 1100 |
+
drawNodeShell(
|
| 1101 |
+
graphics,
|
| 1102 |
+
node,
|
| 1103 |
+
viewport.scale,
|
| 1104 |
+
linkedNodeIds.has(node.id),
|
| 1105 |
+
debugToggles.heat,
|
| 1106 |
+
lodState,
|
| 1107 |
+
nodeEmphasis,
|
| 1108 |
+
)
|
| 1109 |
+
|
| 1110 |
+
graphics.circle(node.hubX, node.hubY, hubRadius).fill({
|
| 1111 |
+
color: linkedNodeIds.has(node.id) ? 0xffcf7a : 0x89f8ea,
|
| 1112 |
+
alpha:
|
| 1113 |
+
((linkedNodeIds.has(node.id) ? 0.68 : 0.08 + node.interNodeLoad * 0.22) *
|
| 1114 |
+
nodeEmphasis *
|
| 1115 |
+
localStructurePresence),
|
| 1116 |
+
})
|
| 1117 |
+
}
|
| 1118 |
+
|
| 1119 |
+
for (const gpu of node.gpus) {
|
| 1120 |
+
const gpuEmphasis = getEmphasis('gpu', gpu.id)
|
| 1121 |
+
const gpuCenterX = gpu.x + gpu.width / 2
|
| 1122 |
+
const connectorStartY =
|
| 1123 |
+
gpu.y + gpu.height / 2 <= node.hubY ? gpu.y + gpu.height : gpu.y
|
| 1124 |
+
const connectorEndY =
|
| 1125 |
+
gpu.y + gpu.height / 2 <= node.hubY ? node.hubY - 4 : node.hubY + 4
|
| 1126 |
+
|
| 1127 |
+
if (localStructurePresence >= 0.08) {
|
| 1128 |
+
graphics
|
| 1129 |
+
.moveTo(gpuCenterX, connectorStartY)
|
| 1130 |
+
.lineTo(gpuCenterX, connectorEndY)
|
| 1131 |
+
.stroke({
|
| 1132 |
+
color: linkedGpuIds.has(gpu.id) ? 0xffd28a : 0x88efe0,
|
| 1133 |
+
alpha:
|
| 1134 |
+
(linkedGpuIds.has(gpu.id)
|
| 1135 |
+
? 0.72
|
| 1136 |
+
: connectorAlpha * (gpu.active ? 0.38 + gpu.linkLoad * 0.34 : 0.12)) * gpuEmphasis,
|
| 1137 |
+
width: screenStroke(
|
| 1138 |
+
viewport.scale,
|
| 1139 |
+
linkedGpuIds.has(gpu.id) ? 1 : gpu.active ? 0.55 + gpu.linkLoad * 0.4 : 0.28,
|
| 1140 |
+
0.03,
|
| 1141 |
+
0.8,
|
| 1142 |
+
),
|
| 1143 |
+
})
|
| 1144 |
+
}
|
| 1145 |
+
|
| 1146 |
+
drawModule(graphics, gpu, viewport.scale, linkedGpuIds.has(gpu.id), lodState, gpuEmphasis)
|
| 1147 |
+
}
|
| 1148 |
+
}
|
| 1149 |
+
|
| 1150 |
+
if (debugToggles.bounds) {
|
| 1151 |
+
for (const pod of visiblePods) {
|
| 1152 |
+
graphics.roundRect(
|
| 1153 |
+
pod.hitBounds.x,
|
| 1154 |
+
pod.hitBounds.y,
|
| 1155 |
+
pod.hitBounds.width,
|
| 1156 |
+
pod.hitBounds.height,
|
| 1157 |
+
screenRadius(viewport.scale, 24, 1.2, 16),
|
| 1158 |
+
).stroke({
|
| 1159 |
+
color: 0xfde6ab,
|
| 1160 |
+
alpha: 0.18,
|
| 1161 |
+
width: screenStroke(viewport.scale, 1, 0.06, 1),
|
| 1162 |
+
})
|
| 1163 |
+
}
|
| 1164 |
+
|
| 1165 |
+
for (const node of visibleNodes) {
|
| 1166 |
+
graphics.roundRect(
|
| 1167 |
+
node.hitBounds.x,
|
| 1168 |
+
node.hitBounds.y,
|
| 1169 |
+
node.hitBounds.width,
|
| 1170 |
+
node.hitBounds.height,
|
| 1171 |
+
screenRadius(viewport.scale, 12, 0.6, 8),
|
| 1172 |
+
).stroke({
|
| 1173 |
+
color: 0xfdf4cc,
|
| 1174 |
+
alpha: 0.34,
|
| 1175 |
+
width: screenStroke(viewport.scale, 1, 0.06, 1),
|
| 1176 |
+
})
|
| 1177 |
+
|
| 1178 |
+
for (const gpu of node.gpus) {
|
| 1179 |
+
graphics.roundRect(
|
| 1180 |
+
gpu.hitBounds.x,
|
| 1181 |
+
gpu.hitBounds.y,
|
| 1182 |
+
gpu.hitBounds.width,
|
| 1183 |
+
gpu.hitBounds.height,
|
| 1184 |
+
screenRadius(viewport.scale, 6, 0.4, 4),
|
| 1185 |
+
).stroke({
|
| 1186 |
+
color: 0x7adfff,
|
| 1187 |
+
alpha: 0.24,
|
| 1188 |
+
width: screenStroke(viewport.scale, 1, 0.06, 1),
|
| 1189 |
+
})
|
| 1190 |
+
}
|
| 1191 |
+
}
|
| 1192 |
+
}
|
| 1193 |
+
|
| 1194 |
+
if (debugToggles.hitAreas) {
|
| 1195 |
+
for (const link of [...visibleLinks.row, ...visibleLinks.column, ...visibleLinks.bus]) {
|
| 1196 |
+
graphics
|
| 1197 |
+
.moveTo(link.x1, link.y1)
|
| 1198 |
+
.lineTo(link.x2, link.y2)
|
| 1199 |
+
.stroke({
|
| 1200 |
+
color: link.kind === 'column' ? 0x60aaf7 : 0xffd08a,
|
| 1201 |
+
alpha: 0.15,
|
| 1202 |
+
width: screenStroke(viewport.scale, link.hitWidth, 0.5, 16),
|
| 1203 |
+
})
|
| 1204 |
+
}
|
| 1205 |
+
}
|
| 1206 |
+
},
|
| 1207 |
+
[
|
| 1208 |
+
debugToggles.bounds,
|
| 1209 |
+
debugToggles.heat,
|
| 1210 |
+
debugToggles.hitAreas,
|
| 1211 |
+
getEmphasis,
|
| 1212 |
+
linkedGpuIds,
|
| 1213 |
+
linkedNodeIds,
|
| 1214 |
+
lodState,
|
| 1215 |
+
model,
|
| 1216 |
+
viewport.scale,
|
| 1217 |
+
visibleLinks.bus,
|
| 1218 |
+
visibleLinks.column,
|
| 1219 |
+
visibleLinks.row,
|
| 1220 |
+
visibleNodes,
|
| 1221 |
+
visiblePods,
|
| 1222 |
+
],
|
| 1223 |
+
)
|
| 1224 |
+
|
| 1225 |
+
const redrawDynamic = useCallback(
|
| 1226 |
+
(timeMs: number) => {
|
| 1227 |
+
const graphics = dynamicRef.current
|
| 1228 |
+
if (!graphics) {
|
| 1229 |
+
return
|
| 1230 |
+
}
|
| 1231 |
+
|
| 1232 |
+
graphics.clear()
|
| 1233 |
+
const pulseTime = snapshotMode ? 0.42 : timeMs / 1000
|
| 1234 |
+
const visibleTarget = pinnedRef.current ?? hoverRef.current
|
| 1235 |
+
const linkGlowAlpha = mix(0.08, 0.18, lodState.weights.board)
|
| 1236 |
+
const animateLinkGlow =
|
| 1237 |
+
lodState.weights.board > 0.14 &&
|
| 1238 |
+
visibleLinkCount < 900 &&
|
| 1239 |
+
viewport.scale >= 0.28
|
| 1240 |
+
|
| 1241 |
+
const drawGlowLink = (link: (typeof model.rowLinks)[number], index: number, color: number) => {
|
| 1242 |
+
const glow = pulse(timeMs, index * 0.19, snapshotMode ? 0 : 0.12)
|
| 1243 |
+
graphics
|
| 1244 |
+
.moveTo(link.x1, link.y1)
|
| 1245 |
+
.lineTo(link.x2, link.y2)
|
| 1246 |
+
.stroke({
|
| 1247 |
+
color,
|
| 1248 |
+
alpha: linkGlowAlpha * (0.12 + link.load * 0.5) * glow,
|
| 1249 |
+
width: screenStroke(viewport.scale, 1.2 + link.load * 2.2, 0.08, 3.2),
|
| 1250 |
+
})
|
| 1251 |
+
}
|
| 1252 |
+
|
| 1253 |
+
if (animateLinkGlow) {
|
| 1254 |
+
visibleLinks.row.forEach((link, index) => {
|
| 1255 |
+
drawGlowLink(link, index, link.color)
|
| 1256 |
+
})
|
| 1257 |
+
visibleLinks.column.forEach((link, index) => {
|
| 1258 |
+
drawGlowLink(link, index + visibleLinks.row.length, link.color)
|
| 1259 |
+
})
|
| 1260 |
+
visibleLinks.bus.forEach((link, index) => {
|
| 1261 |
+
drawGlowLink(
|
| 1262 |
+
link,
|
| 1263 |
+
index + visibleLinks.row.length + visibleLinks.column.length,
|
| 1264 |
+
0x9efef2,
|
| 1265 |
+
)
|
| 1266 |
+
})
|
| 1267 |
+
}
|
| 1268 |
+
|
| 1269 |
+
if (linkedFocus) {
|
| 1270 |
+
const wave = 0.58 + Math.sin(pulseTime * 2.4) * 0.18
|
| 1271 |
+
|
| 1272 |
+
for (const pod of visiblePods) {
|
| 1273 |
+
if (!linkedPodIds.has(pod.id)) {
|
| 1274 |
+
continue
|
| 1275 |
+
}
|
| 1276 |
+
|
| 1277 |
+
drawCornerFocus(graphics, pod.focusFrame, viewport.scale, 0xffd78e, wave, 18, 4, 2)
|
| 1278 |
+
}
|
| 1279 |
+
|
| 1280 |
+
for (const node of visibleNodes) {
|
| 1281 |
+
if (!linkedNodeIds.has(node.id)) {
|
| 1282 |
+
continue
|
| 1283 |
+
}
|
| 1284 |
+
|
| 1285 |
+
drawCornerFocus(graphics, node.focusFrame, viewport.scale, 0xffd78e, wave, 9, 2, 1.1)
|
| 1286 |
+
}
|
| 1287 |
+
|
| 1288 |
+
for (const gpu of visibleGpus) {
|
| 1289 |
+
if (!linkedGpuIds.has(gpu.id)) {
|
| 1290 |
+
continue
|
| 1291 |
+
}
|
| 1292 |
+
|
| 1293 |
+
drawCornerFocus(graphics, gpu.focusFrame, viewport.scale, 0xffefc3, wave + 0.12, 6, 1, 1)
|
| 1294 |
+
}
|
| 1295 |
+
}
|
| 1296 |
+
|
| 1297 |
+
if (!visibleTarget) {
|
| 1298 |
+
return
|
| 1299 |
+
}
|
| 1300 |
+
|
| 1301 |
+
if (visibleTarget.kind === 'pod') {
|
| 1302 |
+
const pod = podById.get(visibleTarget.id)
|
| 1303 |
+
if (!pod) {
|
| 1304 |
+
return
|
| 1305 |
+
}
|
| 1306 |
+
|
| 1307 |
+
drawCornerFocus(graphics, pod.focusFrame, viewport.scale, 0xf9f5bc, 0.86, 22, 6, 2.3)
|
| 1308 |
+
return
|
| 1309 |
+
}
|
| 1310 |
+
|
| 1311 |
+
if (visibleTarget.kind === 'node') {
|
| 1312 |
+
const node = nodeById.get(visibleTarget.id)
|
| 1313 |
+
if (!node) {
|
| 1314 |
+
return
|
| 1315 |
+
}
|
| 1316 |
+
|
| 1317 |
+
drawCornerFocus(graphics, node.focusFrame, viewport.scale, 0xf9f5bc, 0.9, 10, 2, 1.7)
|
| 1318 |
+
return
|
| 1319 |
+
}
|
| 1320 |
+
|
| 1321 |
+
if (visibleTarget.kind === 'gpu') {
|
| 1322 |
+
const gpu = gpuById.get(visibleTarget.id)
|
| 1323 |
+
if (!gpu) {
|
| 1324 |
+
return
|
| 1325 |
+
}
|
| 1326 |
+
|
| 1327 |
+
drawCornerFocus(graphics, gpu.focusFrame, viewport.scale, 0xffffff, 0.96, 7, 1.5, 1.3)
|
| 1328 |
+
return
|
| 1329 |
+
}
|
| 1330 |
+
|
| 1331 |
+
const link = [...model.rowLinks, ...model.columnLinks, ...model.busLinks].find(
|
| 1332 |
+
(item) => item.id === visibleTarget.id,
|
| 1333 |
+
)
|
| 1334 |
+
if (!link) {
|
| 1335 |
+
return
|
| 1336 |
+
}
|
| 1337 |
+
|
| 1338 |
+
graphics
|
| 1339 |
+
.moveTo(link.x1, link.y1)
|
| 1340 |
+
.lineTo(link.x2, link.y2)
|
| 1341 |
+
.stroke({
|
| 1342 |
+
color: 0xfef4c8,
|
| 1343 |
+
alpha: 0.92,
|
| 1344 |
+
width: screenStroke(viewport.scale, 2.6 + link.load * 2.8, 0.14, 4.2),
|
| 1345 |
+
})
|
| 1346 |
+
},
|
| 1347 |
+
[
|
| 1348 |
+
gpuById,
|
| 1349 |
+
linkedFocus,
|
| 1350 |
+
linkedGpuIds,
|
| 1351 |
+
linkedNodeIds,
|
| 1352 |
+
linkedPodIds,
|
| 1353 |
+
lodState.weights.board,
|
| 1354 |
+
model,
|
| 1355 |
+
nodeById,
|
| 1356 |
+
podById,
|
| 1357 |
+
snapshotMode,
|
| 1358 |
+
visibleGpus,
|
| 1359 |
+
visibleLinkCount,
|
| 1360 |
+
viewport.scale,
|
| 1361 |
+
visibleLinks.bus,
|
| 1362 |
+
visibleLinks.column,
|
| 1363 |
+
visibleLinks.row,
|
| 1364 |
+
visibleNodes,
|
| 1365 |
+
visiblePods,
|
| 1366 |
+
],
|
| 1367 |
+
)
|
| 1368 |
+
|
| 1369 |
+
useEffect(() => {
|
| 1370 |
+
redrawDynamic(0)
|
| 1371 |
+
}, [redrawDynamic, hoveredTarget, pinnedTarget, linkedFocus])
|
| 1372 |
+
|
| 1373 |
+
useTick(
|
| 1374 |
+
useCallback(
|
| 1375 |
+
(ticker: Ticker) => {
|
| 1376 |
+
if (snapshotMode) {
|
| 1377 |
+
return
|
| 1378 |
+
}
|
| 1379 |
+
|
| 1380 |
+
const shouldAnimate =
|
| 1381 |
+
linkedFocus != null ||
|
| 1382 |
+
(lodState.weights.board > 0.14 &&
|
| 1383 |
+
visibleLinkCount < 900 &&
|
| 1384 |
+
viewport.scale >= 0.28)
|
| 1385 |
+
|
| 1386 |
+
if (!shouldAnimate) {
|
| 1387 |
+
return
|
| 1388 |
+
}
|
| 1389 |
+
|
| 1390 |
+
redrawDynamic(performance.now())
|
| 1391 |
+
|
| 1392 |
+
statsRef.current.elapsed += ticker.deltaMS
|
| 1393 |
+
statsRef.current.frames += 1
|
| 1394 |
+
if (statsRef.current.elapsed >= 500) {
|
| 1395 |
+
const fps = (statsRef.current.frames * 1000) / statsRef.current.elapsed
|
| 1396 |
+
onFpsChange(fps)
|
| 1397 |
+
statsRef.current.elapsed = 0
|
| 1398 |
+
statsRef.current.frames = 0
|
| 1399 |
+
}
|
| 1400 |
+
},
|
| 1401 |
+
[
|
| 1402 |
+
linkedFocus,
|
| 1403 |
+
lodState.weights.board,
|
| 1404 |
+
onFpsChange,
|
| 1405 |
+
redrawDynamic,
|
| 1406 |
+
snapshotMode,
|
| 1407 |
+
viewport.scale,
|
| 1408 |
+
visibleLinkCount,
|
| 1409 |
+
],
|
| 1410 |
+
),
|
| 1411 |
+
)
|
| 1412 |
+
|
| 1413 |
+
const debugLabels = debugEnabled && debugToggles.ids
|
| 1414 |
+
|
| 1415 |
+
return (
|
| 1416 |
+
<pixiContainer x={viewport.x} y={viewport.y} scale={viewport.scale}>
|
| 1417 |
+
<pixiGraphics draw={drawStatic} />
|
| 1418 |
+
<pixiGraphics ref={dynamicRef} draw={noopDraw} />
|
| 1419 |
+
|
| 1420 |
+
{debugLabels
|
| 1421 |
+
? visiblePods.map((pod) => (
|
| 1422 |
+
<pixiText
|
| 1423 |
+
key={`pod-label-${pod.id}`}
|
| 1424 |
+
x={pod.x + 30}
|
| 1425 |
+
y={pod.y + 24}
|
| 1426 |
+
text={pod.active ? 'ACTIVE RACK' : `R${pod.index + 1}`}
|
| 1427 |
+
style={{
|
| 1428 |
+
fill: 0xdff7f0,
|
| 1429 |
+
fontSize: screenWorld(viewport.scale, 18, 3.5, 24) * lodState.textScale,
|
| 1430 |
+
fontFamily: 'IBM Plex Mono',
|
| 1431 |
+
letterSpacing: screenWorld(viewport.scale, 2, 0.2, 2),
|
| 1432 |
+
}}
|
| 1433 |
+
/>
|
| 1434 |
+
))
|
| 1435 |
+
: null}
|
| 1436 |
+
|
| 1437 |
+
{debugLabels
|
| 1438 |
+
? visibleNodes.map((node) => (
|
| 1439 |
+
<pixiText
|
| 1440 |
+
key={`node-label-${node.id}`}
|
| 1441 |
+
x={node.x + 10}
|
| 1442 |
+
y={node.y + 8}
|
| 1443 |
+
text={`N${node.index + 1}`}
|
| 1444 |
+
style={{
|
| 1445 |
+
fill: 0xdff7f0,
|
| 1446 |
+
fontSize: screenWorld(viewport.scale, 8, 2, 10) * lodState.textScale,
|
| 1447 |
+
fontFamily: 'IBM Plex Mono',
|
| 1448 |
+
}}
|
| 1449 |
+
/>
|
| 1450 |
+
))
|
| 1451 |
+
: null}
|
| 1452 |
+
</pixiContainer>
|
| 1453 |
+
)
|
| 1454 |
+
}
|
| 1455 |
+
|
| 1456 |
+
export function ClusterMap({
|
| 1457 |
+
viewModel,
|
| 1458 |
+
debugEnabled,
|
| 1459 |
+
snapshotMode,
|
| 1460 |
+
linkedFocus,
|
| 1461 |
+
}: ClusterMapProps) {
|
| 1462 |
+
const model = useMemo(() => buildTopologySceneModel(viewModel), [viewModel])
|
| 1463 |
+
const [viewport, setViewport] = useState<ViewportState>({ x: 0, y: 0, scale: 1 })
|
| 1464 |
+
const [surfaceSize, setSurfaceSize] = useState({ width: 0, height: 0 })
|
| 1465 |
+
const [sceneReady, setSceneReady] = useState(false)
|
| 1466 |
+
const [hoveredTarget, setHoveredTarget] = useState<HoverTarget | null>(null)
|
| 1467 |
+
const [pinnedTarget, setPinnedTarget] = useState<HoverTarget | null>(null)
|
| 1468 |
+
const [isDragging, setIsDragging] = useState(false)
|
| 1469 |
+
const [fps, setFps] = useState(0)
|
| 1470 |
+
const [debugToggles, setDebugToggles] = useState<DebugToggles>({
|
| 1471 |
+
bounds: false,
|
| 1472 |
+
ids: false,
|
| 1473 |
+
heat: false,
|
| 1474 |
+
hitAreas: false,
|
| 1475 |
+
stats: true,
|
| 1476 |
+
})
|
| 1477 |
+
const surfaceRef = useRef<HTMLDivElement | null>(null)
|
| 1478 |
+
const interactionLayerRef = useRef<HTMLDivElement | null>(null)
|
| 1479 |
+
const interactionRef = useRef({
|
| 1480 |
+
dragging: false,
|
| 1481 |
+
moved: false,
|
| 1482 |
+
distance: 0,
|
| 1483 |
+
lastPointer: null as ScenePointer | null,
|
| 1484 |
+
pointers: new Map<number, ScenePointer>(),
|
| 1485 |
+
pinchDistance: 0,
|
| 1486 |
+
pinchMidpoint: null as ScenePointer | null,
|
| 1487 |
+
})
|
| 1488 |
+
|
| 1489 |
+
const linkedGpuIds = useMemo(() => {
|
| 1490 |
+
return new Set(
|
| 1491 |
+
model.nodes
|
| 1492 |
+
.flatMap((node) => node.gpus)
|
| 1493 |
+
.filter((gpu) => matchesLinkedFocus(gpu, linkedFocus))
|
| 1494 |
+
.map((gpu) => gpu.id),
|
| 1495 |
+
)
|
| 1496 |
+
}, [linkedFocus, model.nodes])
|
| 1497 |
+
|
| 1498 |
+
const linkedNodeIds = useMemo(() => {
|
| 1499 |
+
return new Set(
|
| 1500 |
+
model.nodes
|
| 1501 |
+
.filter((node) => node.gpus.some((gpu) => matchesLinkedFocus(gpu, linkedFocus)))
|
| 1502 |
+
.map((node) => node.id),
|
| 1503 |
+
)
|
| 1504 |
+
}, [linkedFocus, model.nodes])
|
| 1505 |
+
|
| 1506 |
+
const linkedPodIds = useMemo(() => {
|
| 1507 |
+
if (!linkedFocus) {
|
| 1508 |
+
return new Set<string>()
|
| 1509 |
+
}
|
| 1510 |
+
|
| 1511 |
+
return new Set(
|
| 1512 |
+
model.nodes
|
| 1513 |
+
.filter((node) => node.gpus.some((gpu) => matchesLinkedFocus(gpu, linkedFocus)))
|
| 1514 |
+
.map((node) => `pod-${node.domainIndex}`),
|
| 1515 |
+
)
|
| 1516 |
+
}, [linkedFocus, model.nodes])
|
| 1517 |
+
|
| 1518 |
+
useEffect(() => {
|
| 1519 |
+
if (surfaceSize.width === 0 || surfaceSize.height === 0) {
|
| 1520 |
+
return
|
| 1521 |
+
}
|
| 1522 |
+
|
| 1523 |
+
let settleFrame = 0
|
| 1524 |
+
const frame = requestAnimationFrame(() => {
|
| 1525 |
+
setViewport(getFitViewport(model, surfaceSize.width, surfaceSize.height))
|
| 1526 |
+
settleFrame = requestAnimationFrame(() => {
|
| 1527 |
+
setSceneReady(true)
|
| 1528 |
+
})
|
| 1529 |
+
})
|
| 1530 |
+
|
| 1531 |
+
return () => {
|
| 1532 |
+
cancelAnimationFrame(frame)
|
| 1533 |
+
cancelAnimationFrame(settleFrame)
|
| 1534 |
+
}
|
| 1535 |
+
}, [model, surfaceSize.height, surfaceSize.width])
|
| 1536 |
+
|
| 1537 |
+
const focusedDetails = useMemo<TargetDetails | null>(() => {
|
| 1538 |
+
return describeTarget(model, viewModel, pinnedTarget ?? hoveredTarget)
|
| 1539 |
+
}, [hoveredTarget, model, pinnedTarget, viewModel])
|
| 1540 |
+
|
| 1541 |
+
const debugObjects = useMemo(
|
| 1542 |
+
() => createDebugObjectMap(model, viewport),
|
| 1543 |
+
[model, viewport],
|
| 1544 |
+
)
|
| 1545 |
+
const detailLevel = useMemo(() => getTopologyLodState(viewport.scale).primaryBand, [viewport.scale])
|
| 1546 |
+
const viewportConstraints = useMemo(() => {
|
| 1547 |
+
if (surfaceSize.width === 0 || surfaceSize.height === 0) {
|
| 1548 |
+
return null
|
| 1549 |
+
}
|
| 1550 |
+
|
| 1551 |
+
return getViewportConstraints(model, surfaceSize.width, surfaceSize.height, viewport.scale)
|
| 1552 |
+
}, [model, surfaceSize.height, surfaceSize.width, viewport.scale])
|
| 1553 |
+
|
| 1554 |
+
useEffect(() => {
|
| 1555 |
+
if (!(debugEnabled || snapshotMode)) {
|
| 1556 |
+
delete window.__TOPOLOGY_DEBUG__
|
| 1557 |
+
return
|
| 1558 |
+
}
|
| 1559 |
+
|
| 1560 |
+
window.__TOPOLOGY_DEBUG__ = {
|
| 1561 |
+
ready: sceneReady,
|
| 1562 |
+
viewport,
|
| 1563 |
+
surfaceSize,
|
| 1564 |
+
objectCounts: model.objectCounts,
|
| 1565 |
+
objects: debugObjects,
|
| 1566 |
+
hoveredTarget,
|
| 1567 |
+
pinnedTarget,
|
| 1568 |
+
detailLevel,
|
| 1569 |
+
setViewport: (nextViewport: ViewportState) => {
|
| 1570 |
+
setViewport(clampViewportToScene(nextViewport, model, surfaceSize.width, surfaceSize.height))
|
| 1571 |
+
},
|
| 1572 |
+
}
|
| 1573 |
+
|
| 1574 |
+
return () => {
|
| 1575 |
+
delete window.__TOPOLOGY_DEBUG__
|
| 1576 |
+
}
|
| 1577 |
+
}, [
|
| 1578 |
+
debugEnabled,
|
| 1579 |
+
debugObjects,
|
| 1580 |
+
hoveredTarget,
|
| 1581 |
+
model.objectCounts,
|
| 1582 |
+
pinnedTarget,
|
| 1583 |
+
sceneReady,
|
| 1584 |
+
snapshotMode,
|
| 1585 |
+
surfaceSize,
|
| 1586 |
+
detailLevel,
|
| 1587 |
+
model,
|
| 1588 |
+
viewport,
|
| 1589 |
+
])
|
| 1590 |
+
|
| 1591 |
+
const scenePointerFromClient = useCallback((clientX: number, clientY: number) => {
|
| 1592 |
+
const bounds = interactionLayerRef.current?.getBoundingClientRect()
|
| 1593 |
+
if (!bounds) {
|
| 1594 |
+
return null
|
| 1595 |
+
}
|
| 1596 |
+
|
| 1597 |
+
return {
|
| 1598 |
+
x: clientX - bounds.left,
|
| 1599 |
+
y: clientY - bounds.top,
|
| 1600 |
+
}
|
| 1601 |
+
}, [])
|
| 1602 |
+
|
| 1603 |
+
const scenePointerFromEvent = useCallback(
|
| 1604 |
+
(event: Pick<ReactPointerEvent<HTMLDivElement>, 'clientX' | 'clientY'>) =>
|
| 1605 |
+
scenePointerFromClient(event.clientX, event.clientY),
|
| 1606 |
+
[scenePointerFromClient],
|
| 1607 |
+
)
|
| 1608 |
+
|
| 1609 |
+
const toWorldPoint = useCallback(
|
| 1610 |
+
(pointer: ScenePointer) => ({
|
| 1611 |
+
x: (pointer.x - viewport.x) / viewport.scale,
|
| 1612 |
+
y: (pointer.y - viewport.y) / viewport.scale,
|
| 1613 |
+
}),
|
| 1614 |
+
[viewport],
|
| 1615 |
+
)
|
| 1616 |
+
|
| 1617 |
+
const setViewportClamped = useCallback(
|
| 1618 |
+
(updater: ViewportState | ((current: ViewportState) => ViewportState)) => {
|
| 1619 |
+
setViewport((current) => {
|
| 1620 |
+
const nextViewport =
|
| 1621 |
+
typeof updater === 'function'
|
| 1622 |
+
? (updater as (current: ViewportState) => ViewportState)(current)
|
| 1623 |
+
: updater
|
| 1624 |
+
|
| 1625 |
+
return clampViewportToScene(nextViewport, model, surfaceSize.width, surfaceSize.height)
|
| 1626 |
+
})
|
| 1627 |
+
},
|
| 1628 |
+
[model, surfaceSize.height, surfaceSize.width],
|
| 1629 |
+
)
|
| 1630 |
+
|
| 1631 |
+
const applyZoomAtPointer = useCallback((screenPoint: ScenePointer, zoomFactor: number) => {
|
| 1632 |
+
setViewportClamped((current) => {
|
| 1633 |
+
const nextScale = clamp(
|
| 1634 |
+
current.scale * zoomFactor,
|
| 1635 |
+
viewportConstraints?.minScale ?? MIN_SCALE,
|
| 1636 |
+
viewportConstraints?.maxScale ?? MAX_SCALE,
|
| 1637 |
+
)
|
| 1638 |
+
const worldX = (screenPoint.x - current.x) / current.scale
|
| 1639 |
+
const worldY = (screenPoint.y - current.y) / current.scale
|
| 1640 |
+
|
| 1641 |
+
return {
|
| 1642 |
+
scale: nextScale,
|
| 1643 |
+
x: screenPoint.x - worldX * nextScale,
|
| 1644 |
+
y: screenPoint.y - worldY * nextScale,
|
| 1645 |
+
}
|
| 1646 |
+
})
|
| 1647 |
+
}, [setViewportClamped, viewportConstraints?.maxScale, viewportConstraints?.minScale])
|
| 1648 |
+
|
| 1649 |
+
const updateHoverFromPointer = useCallback(
|
| 1650 |
+
(pointer: ScenePointer | null) => {
|
| 1651 |
+
if (!pointer) {
|
| 1652 |
+
setHoveredTarget((current) => (current === null ? current : null))
|
| 1653 |
+
return
|
| 1654 |
+
}
|
| 1655 |
+
|
| 1656 |
+
const worldPoint = toWorldPoint(pointer)
|
| 1657 |
+
const next = findHoverTarget(model, worldPoint.x, worldPoint.y)
|
| 1658 |
+
|
| 1659 |
+
setHoveredTarget((current) => {
|
| 1660 |
+
if (current?.kind === next?.kind && current?.id === next?.id) {
|
| 1661 |
+
return current
|
| 1662 |
+
}
|
| 1663 |
+
|
| 1664 |
+
return next
|
| 1665 |
+
})
|
| 1666 |
+
},
|
| 1667 |
+
[model, toWorldPoint],
|
| 1668 |
+
)
|
| 1669 |
+
|
| 1670 |
+
const resetViewport = useCallback(() => {
|
| 1671 |
+
if (surfaceSize.width === 0 || surfaceSize.height === 0) {
|
| 1672 |
+
return
|
| 1673 |
+
}
|
| 1674 |
+
|
| 1675 |
+
setViewport(getFitViewport(model, surfaceSize.width, surfaceSize.height))
|
| 1676 |
+
}, [model, surfaceSize.height, surfaceSize.width])
|
| 1677 |
+
|
| 1678 |
+
const handleSurfaceSizeChange = useCallback((width: number, height: number) => {
|
| 1679 |
+
setSurfaceSize((current) => {
|
| 1680 |
+
if (current.width === width && current.height === height) {
|
| 1681 |
+
return current
|
| 1682 |
+
}
|
| 1683 |
+
|
| 1684 |
+
return { width, height }
|
| 1685 |
+
})
|
| 1686 |
+
setSceneReady(false)
|
| 1687 |
+
}, [])
|
| 1688 |
+
|
| 1689 |
+
useEffect(() => {
|
| 1690 |
+
const element = interactionLayerRef.current
|
| 1691 |
+
if (!element) {
|
| 1692 |
+
return
|
| 1693 |
+
}
|
| 1694 |
+
|
| 1695 |
+
const handleWheel = (event: WheelEvent) => {
|
| 1696 |
+
if (event.target instanceof Element && event.target.closest('.scene-inspector, .scene-debug-panel')) {
|
| 1697 |
+
return
|
| 1698 |
+
}
|
| 1699 |
+
|
| 1700 |
+
const pointer = scenePointerFromClient(event.clientX, event.clientY)
|
| 1701 |
+
if (!pointer) {
|
| 1702 |
+
return
|
| 1703 |
+
}
|
| 1704 |
+
|
| 1705 |
+
event.preventDefault()
|
| 1706 |
+
event.stopPropagation()
|
| 1707 |
+
|
| 1708 |
+
const delta = event.ctrlKey ? event.deltaY * 1.8 : event.deltaY
|
| 1709 |
+
const zoomFactor = Math.exp(-delta * 0.0015)
|
| 1710 |
+
applyZoomAtPointer(pointer, zoomFactor)
|
| 1711 |
+
}
|
| 1712 |
+
|
| 1713 |
+
element.addEventListener('wheel', handleWheel, { passive: false })
|
| 1714 |
+
return () => {
|
| 1715 |
+
element.removeEventListener('wheel', handleWheel)
|
| 1716 |
+
}
|
| 1717 |
+
}, [applyZoomAtPointer, scenePointerFromClient])
|
| 1718 |
+
|
| 1719 |
+
const togglePinnedTarget = useCallback(
|
| 1720 |
+
(pointer: ScenePointer) => {
|
| 1721 |
+
const worldPoint = toWorldPoint(pointer)
|
| 1722 |
+
const target = findHoverTarget(model, worldPoint.x, worldPoint.y)
|
| 1723 |
+
|
| 1724 |
+
if (!target || target.kind === 'link') {
|
| 1725 |
+
setPinnedTarget(null)
|
| 1726 |
+
return
|
| 1727 |
+
}
|
| 1728 |
+
|
| 1729 |
+
setPinnedTarget((current) => {
|
| 1730 |
+
if (current?.kind === target.kind && current.id === target.id) {
|
| 1731 |
+
return null
|
| 1732 |
+
}
|
| 1733 |
+
|
| 1734 |
+
return target
|
| 1735 |
+
})
|
| 1736 |
+
},
|
| 1737 |
+
[model, toWorldPoint],
|
| 1738 |
+
)
|
| 1739 |
+
|
| 1740 |
+
const handlePointerDown = useCallback(
|
| 1741 |
+
(event: ReactPointerEvent<HTMLDivElement>) => {
|
| 1742 |
+
if (event.target !== event.currentTarget) {
|
| 1743 |
+
return
|
| 1744 |
+
}
|
| 1745 |
+
|
| 1746 |
+
const pointer = scenePointerFromEvent(event)
|
| 1747 |
+
if (!pointer) {
|
| 1748 |
+
return
|
| 1749 |
+
}
|
| 1750 |
+
|
| 1751 |
+
const interaction = interactionRef.current
|
| 1752 |
+
interaction.pointers.set(event.pointerId, pointer)
|
| 1753 |
+
interaction.lastPointer = pointer
|
| 1754 |
+
interaction.moved = false
|
| 1755 |
+
interaction.distance = 0
|
| 1756 |
+
|
| 1757 |
+
if (interaction.pointers.size === 1) {
|
| 1758 |
+
interaction.dragging = true
|
| 1759 |
+
setIsDragging(true)
|
| 1760 |
+
} else if (interaction.pointers.size === 2) {
|
| 1761 |
+
const [first, second] = Array.from(interaction.pointers.values())
|
| 1762 |
+
const deltaX = second.x - first.x
|
| 1763 |
+
const deltaY = second.y - first.y
|
| 1764 |
+
interaction.dragging = false
|
| 1765 |
+
interaction.pinchDistance = Math.hypot(deltaX, deltaY)
|
| 1766 |
+
interaction.pinchMidpoint = {
|
| 1767 |
+
x: (first.x + second.x) / 2,
|
| 1768 |
+
y: (first.y + second.y) / 2,
|
| 1769 |
+
}
|
| 1770 |
+
setIsDragging(false)
|
| 1771 |
+
}
|
| 1772 |
+
|
| 1773 |
+
event.currentTarget.setPointerCapture(event.pointerId)
|
| 1774 |
+
},
|
| 1775 |
+
[scenePointerFromEvent],
|
| 1776 |
+
)
|
| 1777 |
+
|
| 1778 |
+
const handlePointerMove = useCallback(
|
| 1779 |
+
(event: ReactPointerEvent<HTMLDivElement>) => {
|
| 1780 |
+
const pointer = scenePointerFromEvent(event)
|
| 1781 |
+
if (!pointer) {
|
| 1782 |
+
return
|
| 1783 |
+
}
|
| 1784 |
+
|
| 1785 |
+
const interaction = interactionRef.current
|
| 1786 |
+
if (interaction.pointers.has(event.pointerId)) {
|
| 1787 |
+
interaction.pointers.set(event.pointerId, pointer)
|
| 1788 |
+
}
|
| 1789 |
+
|
| 1790 |
+
if (interaction.pointers.size === 2) {
|
| 1791 |
+
const [first, second] = Array.from(interaction.pointers.values())
|
| 1792 |
+
const deltaX = second.x - first.x
|
| 1793 |
+
const deltaY = second.y - first.y
|
| 1794 |
+
const distance = Math.max(Math.hypot(deltaX, deltaY), 1)
|
| 1795 |
+
const midpoint = {
|
| 1796 |
+
x: (first.x + second.x) / 2,
|
| 1797 |
+
y: (first.y + second.y) / 2,
|
| 1798 |
+
}
|
| 1799 |
+
|
| 1800 |
+
if (interaction.pinchDistance > 0 && interaction.pinchMidpoint) {
|
| 1801 |
+
const zoomFactor = distance / interaction.pinchDistance
|
| 1802 |
+
setViewportClamped((current) => {
|
| 1803 |
+
const nextScale = clamp(
|
| 1804 |
+
current.scale * zoomFactor,
|
| 1805 |
+
viewportConstraints?.minScale ?? MIN_SCALE,
|
| 1806 |
+
viewportConstraints?.maxScale ?? MAX_SCALE,
|
| 1807 |
+
)
|
| 1808 |
+
const worldX = (midpoint.x - current.x) / current.scale
|
| 1809 |
+
const worldY = (midpoint.y - current.y) / current.scale
|
| 1810 |
+
|
| 1811 |
+
return {
|
| 1812 |
+
scale: nextScale,
|
| 1813 |
+
x:
|
| 1814 |
+
midpoint.x -
|
| 1815 |
+
worldX * nextScale +
|
| 1816 |
+
(midpoint.x - interaction.pinchMidpoint!.x),
|
| 1817 |
+
y:
|
| 1818 |
+
midpoint.y -
|
| 1819 |
+
worldY * nextScale +
|
| 1820 |
+
(midpoint.y - interaction.pinchMidpoint!.y),
|
| 1821 |
+
}
|
| 1822 |
+
})
|
| 1823 |
+
}
|
| 1824 |
+
|
| 1825 |
+
interaction.pinchDistance = distance
|
| 1826 |
+
interaction.pinchMidpoint = midpoint
|
| 1827 |
+
interaction.moved = true
|
| 1828 |
+
return
|
| 1829 |
+
}
|
| 1830 |
+
|
| 1831 |
+
if (interaction.dragging && interaction.lastPointer) {
|
| 1832 |
+
const deltaMoveX = pointer.x - interaction.lastPointer.x
|
| 1833 |
+
const deltaMoveY = pointer.y - interaction.lastPointer.y
|
| 1834 |
+
interaction.lastPointer = pointer
|
| 1835 |
+
interaction.distance += Math.abs(deltaMoveX) + Math.abs(deltaMoveY)
|
| 1836 |
+
|
| 1837 |
+
if (interaction.distance > 2) {
|
| 1838 |
+
interaction.moved = true
|
| 1839 |
+
}
|
| 1840 |
+
|
| 1841 |
+
setViewportClamped((current) => ({
|
| 1842 |
+
...current,
|
| 1843 |
+
x: current.x + deltaMoveX,
|
| 1844 |
+
y: current.y + deltaMoveY,
|
| 1845 |
+
}))
|
| 1846 |
+
return
|
| 1847 |
+
}
|
| 1848 |
+
|
| 1849 |
+
if (event.target !== event.currentTarget) {
|
| 1850 |
+
return
|
| 1851 |
+
}
|
| 1852 |
+
|
| 1853 |
+
updateHoverFromPointer(pointer)
|
| 1854 |
+
},
|
| 1855 |
+
[
|
| 1856 |
+
scenePointerFromEvent,
|
| 1857 |
+
setViewportClamped,
|
| 1858 |
+
updateHoverFromPointer,
|
| 1859 |
+
viewportConstraints?.maxScale,
|
| 1860 |
+
viewportConstraints?.minScale,
|
| 1861 |
+
],
|
| 1862 |
+
)
|
| 1863 |
+
|
| 1864 |
+
const releasePointer = useCallback((pointerId: number) => {
|
| 1865 |
+
const interaction = interactionRef.current
|
| 1866 |
+
interaction.pointers.delete(pointerId)
|
| 1867 |
+
|
| 1868 |
+
if (interaction.pointers.size < 2) {
|
| 1869 |
+
interaction.pinchDistance = 0
|
| 1870 |
+
interaction.pinchMidpoint = null
|
| 1871 |
+
}
|
| 1872 |
+
|
| 1873 |
+
if (interaction.pointers.size === 0) {
|
| 1874 |
+
interaction.dragging = false
|
| 1875 |
+
interaction.lastPointer = null
|
| 1876 |
+
setIsDragging(false)
|
| 1877 |
+
return
|
| 1878 |
+
}
|
| 1879 |
+
|
| 1880 |
+
const remainingPointer = Array.from(interaction.pointers.values())[0]
|
| 1881 |
+
interaction.lastPointer = remainingPointer
|
| 1882 |
+
interaction.dragging = true
|
| 1883 |
+
}, [])
|
| 1884 |
+
|
| 1885 |
+
const handlePointerUp = useCallback(
|
| 1886 |
+
(event: ReactPointerEvent<HTMLDivElement>) => {
|
| 1887 |
+
const pointer = scenePointerFromEvent(event)
|
| 1888 |
+
const interaction = interactionRef.current
|
| 1889 |
+
const wasClick = !interaction.moved && interaction.distance < 8 && interaction.pointers.size <= 1
|
| 1890 |
+
|
| 1891 |
+
if (event.currentTarget.hasPointerCapture(event.pointerId)) {
|
| 1892 |
+
event.currentTarget.releasePointerCapture(event.pointerId)
|
| 1893 |
+
}
|
| 1894 |
+
|
| 1895 |
+
releasePointer(event.pointerId)
|
| 1896 |
+
|
| 1897 |
+
if (pointer) {
|
| 1898 |
+
updateHoverFromPointer(pointer)
|
| 1899 |
+
}
|
| 1900 |
+
|
| 1901 |
+
if (!pointer || !wasClick || event.target !== event.currentTarget) {
|
| 1902 |
+
return
|
| 1903 |
+
}
|
| 1904 |
+
|
| 1905 |
+
togglePinnedTarget(pointer)
|
| 1906 |
+
},
|
| 1907 |
+
[releasePointer, scenePointerFromEvent, togglePinnedTarget, updateHoverFromPointer],
|
| 1908 |
+
)
|
| 1909 |
+
|
| 1910 |
+
const handlePointerLeave = useCallback(() => {
|
| 1911 |
+
interactionRef.current.dragging = false
|
| 1912 |
+
interactionRef.current.lastPointer = null
|
| 1913 |
+
interactionRef.current.pointers.clear()
|
| 1914 |
+
interactionRef.current.pinchDistance = 0
|
| 1915 |
+
interactionRef.current.pinchMidpoint = null
|
| 1916 |
+
setIsDragging(false)
|
| 1917 |
+
setHoveredTarget(null)
|
| 1918 |
+
}, [])
|
| 1919 |
+
|
| 1920 |
+
const toggleDebugFlag = (key: keyof DebugToggles) => {
|
| 1921 |
+
setDebugToggles((current) => ({
|
| 1922 |
+
...current,
|
| 1923 |
+
[key]: !current[key],
|
| 1924 |
+
}))
|
| 1925 |
+
}
|
| 1926 |
+
|
| 1927 |
+
const linkedSummary = linkedFocus ? linkedFocus.label : null
|
| 1928 |
+
|
| 1929 |
+
return (
|
| 1930 |
+
<div className="topology-scene-shell">
|
| 1931 |
+
<div className="scene-toolbar">
|
| 1932 |
+
<div className="scene-toolbar-actions">
|
| 1933 |
+
<button
|
| 1934 |
+
type="button"
|
| 1935 |
+
className="scene-button"
|
| 1936 |
+
onClick={resetViewport}
|
| 1937 |
+
data-testid="camera-reset"
|
| 1938 |
+
>
|
| 1939 |
+
reset camera
|
| 1940 |
+
</button>
|
| 1941 |
+
</div>
|
| 1942 |
+
</div>
|
| 1943 |
+
|
| 1944 |
+
<div
|
| 1945 |
+
ref={surfaceRef}
|
| 1946 |
+
className="pixi-surface-wrap topology-surface-wrap"
|
| 1947 |
+
>
|
| 1948 |
+
<PixiSurface
|
| 1949 |
+
className="pixi-surface"
|
| 1950 |
+
canvasClassName="pixi-canvas"
|
| 1951 |
+
testId="topology-scene"
|
| 1952 |
+
onSizeChange={handleSurfaceSizeChange}
|
| 1953 |
+
>
|
| 1954 |
+
{() => (
|
| 1955 |
+
<TopologyScene
|
| 1956 |
+
model={model}
|
| 1957 |
+
viewport={viewport}
|
| 1958 |
+
surfaceSize={surfaceSize}
|
| 1959 |
+
hoveredTarget={hoveredTarget}
|
| 1960 |
+
pinnedTarget={pinnedTarget}
|
| 1961 |
+
linkedFocus={linkedFocus}
|
| 1962 |
+
linkedGpuIds={linkedGpuIds}
|
| 1963 |
+
linkedNodeIds={linkedNodeIds}
|
| 1964 |
+
linkedPodIds={linkedPodIds}
|
| 1965 |
+
debugEnabled={debugEnabled}
|
| 1966 |
+
snapshotMode={snapshotMode}
|
| 1967 |
+
debugToggles={debugToggles}
|
| 1968 |
+
onFpsChange={setFps}
|
| 1969 |
+
/>
|
| 1970 |
+
)}
|
| 1971 |
+
</PixiSurface>
|
| 1972 |
+
|
| 1973 |
+
<div
|
| 1974 |
+
ref={interactionLayerRef}
|
| 1975 |
+
className={`topology-interaction-layer${isDragging ? ' is-dragging' : ''}`}
|
| 1976 |
+
data-testid="topology-interaction-layer"
|
| 1977 |
+
onPointerDown={handlePointerDown}
|
| 1978 |
+
onPointerMove={handlePointerMove}
|
| 1979 |
+
onPointerUp={handlePointerUp}
|
| 1980 |
+
onPointerCancel={handlePointerLeave}
|
| 1981 |
+
onPointerLeave={handlePointerLeave}
|
| 1982 |
+
onDoubleClick={(event) => {
|
| 1983 |
+
if (event.target !== event.currentTarget) {
|
| 1984 |
+
return
|
| 1985 |
+
}
|
| 1986 |
+
|
| 1987 |
+
resetViewport()
|
| 1988 |
+
}}
|
| 1989 |
+
>
|
| 1990 |
+
<div className="scene-inspector" data-testid="topology-inspector">
|
| 1991 |
+
<p className="mini-label">
|
| 1992 |
+
{pinnedTarget ? 'Pinned target' : hoveredTarget ? 'Hover target' : 'Topology inspector'}
|
| 1993 |
+
</p>
|
| 1994 |
+
{focusedDetails ? (
|
| 1995 |
+
<>
|
| 1996 |
+
<h3>{focusedDetails.heading}</h3>
|
| 1997 |
+
<p className="inspector-subheading">{focusedDetails.subheading}</p>
|
| 1998 |
+
{linkedSummary ? (
|
| 1999 |
+
<p className="inspector-link-note">Transformer highlight: {linkedSummary}</p>
|
| 2000 |
+
) : null}
|
| 2001 |
+
<dl className="inspector-grid">
|
| 2002 |
+
{focusedDetails.metrics.map((metric) => (
|
| 2003 |
+
<div key={`${focusedDetails.id}-${metric.label}`}>
|
| 2004 |
+
<dt>{metric.label}</dt>
|
| 2005 |
+
<dd>{metric.value}</dd>
|
| 2006 |
+
</div>
|
| 2007 |
+
))}
|
| 2008 |
+
</dl>
|
| 2009 |
+
</>
|
| 2010 |
+
) : (
|
| 2011 |
+
<>
|
| 2012 |
+
<h3>Inspect the cluster</h3>
|
| 2013 |
+
<p className="inspector-subheading">
|
| 2014 |
+
Hover a rack or GPU to inspect placement, memory headroom, and link load.
|
| 2015 |
+
Pan and zoom to move between fabric and package detail.
|
| 2016 |
+
</p>
|
| 2017 |
+
{linkedSummary ? (
|
| 2018 |
+
<p className="inspector-link-note">Transformer highlight: {linkedSummary}</p>
|
| 2019 |
+
) : null}
|
| 2020 |
+
</>
|
| 2021 |
+
)}
|
| 2022 |
+
</div>
|
| 2023 |
+
|
| 2024 |
+
{(debugEnabled || snapshotMode) && (
|
| 2025 |
+
<div className="scene-debug-panel" data-testid="topology-debug">
|
| 2026 |
+
<p className="mini-label">Debug overlay</p>
|
| 2027 |
+
<div className="debug-toggle-grid">
|
| 2028 |
+
<label>
|
| 2029 |
+
<input
|
| 2030 |
+
type="checkbox"
|
| 2031 |
+
checked={debugToggles.bounds}
|
| 2032 |
+
onChange={() => toggleDebugFlag('bounds')}
|
| 2033 |
+
/>
|
| 2034 |
+
Bounds
|
| 2035 |
+
</label>
|
| 2036 |
+
<label>
|
| 2037 |
+
<input
|
| 2038 |
+
type="checkbox"
|
| 2039 |
+
checked={debugToggles.ids}
|
| 2040 |
+
onChange={() => toggleDebugFlag('ids')}
|
| 2041 |
+
/>
|
| 2042 |
+
Node / GPU ids
|
| 2043 |
+
</label>
|
| 2044 |
+
<label>
|
| 2045 |
+
<input
|
| 2046 |
+
type="checkbox"
|
| 2047 |
+
checked={debugToggles.heat}
|
| 2048 |
+
onChange={() => toggleDebugFlag('heat')}
|
| 2049 |
+
/>
|
| 2050 |
+
Load heat
|
| 2051 |
+
</label>
|
| 2052 |
+
<label>
|
| 2053 |
+
<input
|
| 2054 |
+
type="checkbox"
|
| 2055 |
+
checked={debugToggles.hitAreas}
|
| 2056 |
+
onChange={() => toggleDebugFlag('hitAreas')}
|
| 2057 |
+
/>
|
| 2058 |
+
Link hit areas
|
| 2059 |
+
</label>
|
| 2060 |
+
<label>
|
| 2061 |
+
<input
|
| 2062 |
+
type="checkbox"
|
| 2063 |
+
checked={debugToggles.stats}
|
| 2064 |
+
onChange={() => toggleDebugFlag('stats')}
|
| 2065 |
+
/>
|
| 2066 |
+
FPS / counts
|
| 2067 |
+
</label>
|
| 2068 |
+
</div>
|
| 2069 |
+
|
| 2070 |
+
{debugToggles.stats ? (
|
| 2071 |
+
<div className="debug-stats">
|
| 2072 |
+
<span>FPS {snapshotMode ? 'snapshot' : fps.toFixed(0)}</span>
|
| 2073 |
+
<span>Racks {model.objectCounts.pods}</span>
|
| 2074 |
+
<span>Nodes {model.objectCounts.nodes}</span>
|
| 2075 |
+
<span>GPUs {model.objectCounts.gpus}</span>
|
| 2076 |
+
<span>Detail {detailLevel}</span>
|
| 2077 |
+
<span>Zoom {viewport.scale.toFixed(2)}x</span>
|
| 2078 |
+
</div>
|
| 2079 |
+
) : null}
|
| 2080 |
+
</div>
|
| 2081 |
+
)}
|
| 2082 |
+
</div>
|
| 2083 |
+
</div>
|
| 2084 |
+
</div>
|
| 2085 |
+
)
|
| 2086 |
+
}
|
src/components/ControlsPanel.tsx
ADDED
|
@@ -0,0 +1,688 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import {
|
| 2 |
+
applyGpuPreset,
|
| 3 |
+
applyExamplePhase,
|
| 4 |
+
applyExamplePreset,
|
| 5 |
+
getExamplePresetOptions,
|
| 6 |
+
getFactorOptions,
|
| 7 |
+
getGpuPresetId,
|
| 8 |
+
getGpuPresetOptions,
|
| 9 |
+
getPhaseOptions,
|
| 10 |
+
type WorkbenchConfig,
|
| 11 |
+
} from '../lib/workbench'
|
| 12 |
+
import { type WorkbenchViewModel } from '../lib/workbenchPresenter'
|
| 13 |
+
|
| 14 |
+
type ControlsPanelProps = {
|
| 15 |
+
config: WorkbenchConfig
|
| 16 |
+
onChange: (next: WorkbenchConfig) => void
|
| 17 |
+
onReset: () => void
|
| 18 |
+
viewModel: WorkbenchViewModel
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
type OptionStripProps = {
|
| 22 |
+
label: string
|
| 23 |
+
caption: string
|
| 24 |
+
value: number
|
| 25 |
+
options: number[]
|
| 26 |
+
onSelect: (value: number) => void
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
type SelectFieldProps = {
|
| 30 |
+
label: string
|
| 31 |
+
value: string
|
| 32 |
+
options: Array<{ value: string; label: string }>
|
| 33 |
+
onChange: (value: string) => void
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
type NumberFieldProps = {
|
| 37 |
+
label: string
|
| 38 |
+
value: number
|
| 39 |
+
min: number
|
| 40 |
+
step?: number
|
| 41 |
+
onChange: (value: number) => void
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
type ToggleFieldProps = {
|
| 45 |
+
label: string
|
| 46 |
+
checked: boolean
|
| 47 |
+
onChange: (checked: boolean) => void
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
function OptionStrip({ label, caption, value, options, onSelect }: OptionStripProps) {
|
| 51 |
+
return (
|
| 52 |
+
<div className="control-card">
|
| 53 |
+
<div className="control-card-header">
|
| 54 |
+
<div>
|
| 55 |
+
<h3>{label}</h3>
|
| 56 |
+
<p>{caption}</p>
|
| 57 |
+
</div>
|
| 58 |
+
<span className="control-badge">{value}x</span>
|
| 59 |
+
</div>
|
| 60 |
+
|
| 61 |
+
<div className="option-strip" role="group" aria-label={label}>
|
| 62 |
+
{options.map((option) => (
|
| 63 |
+
<button
|
| 64 |
+
key={option}
|
| 65 |
+
type="button"
|
| 66 |
+
className={option === value ? 'option-chip active' : 'option-chip'}
|
| 67 |
+
onClick={() => onSelect(option)}
|
| 68 |
+
>
|
| 69 |
+
{option}
|
| 70 |
+
</button>
|
| 71 |
+
))}
|
| 72 |
+
</div>
|
| 73 |
+
</div>
|
| 74 |
+
)
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
function SelectField({ label, value, options, onChange }: SelectFieldProps) {
|
| 78 |
+
return (
|
| 79 |
+
<label className="control-field">
|
| 80 |
+
<span>{label}</span>
|
| 81 |
+
<select value={value} onChange={(event) => onChange(event.target.value)}>
|
| 82 |
+
{options.map((option) => (
|
| 83 |
+
<option key={option.value} value={option.value}>
|
| 84 |
+
{option.label}
|
| 85 |
+
</option>
|
| 86 |
+
))}
|
| 87 |
+
</select>
|
| 88 |
+
</label>
|
| 89 |
+
)
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
function NumberField({ label, value, min, step = 1, onChange }: NumberFieldProps) {
|
| 93 |
+
return (
|
| 94 |
+
<label className="control-field">
|
| 95 |
+
<span>{label}</span>
|
| 96 |
+
<input
|
| 97 |
+
type="number"
|
| 98 |
+
min={min}
|
| 99 |
+
step={step}
|
| 100 |
+
value={value}
|
| 101 |
+
onChange={(event) => onChange(Math.max(min, Number(event.target.value) || min))}
|
| 102 |
+
/>
|
| 103 |
+
</label>
|
| 104 |
+
)
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
function ToggleField({ label, checked, onChange }: ToggleFieldProps) {
|
| 108 |
+
return (
|
| 109 |
+
<label className="control-toggle">
|
| 110 |
+
<input
|
| 111 |
+
type="checkbox"
|
| 112 |
+
checked={checked}
|
| 113 |
+
onChange={(event) => onChange(event.target.checked)}
|
| 114 |
+
/>
|
| 115 |
+
<span>{label}</span>
|
| 116 |
+
</label>
|
| 117 |
+
)
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
export function ControlsPanel({
|
| 121 |
+
config,
|
| 122 |
+
onChange,
|
| 123 |
+
onReset,
|
| 124 |
+
viewModel,
|
| 125 |
+
}: ControlsPanelProps) {
|
| 126 |
+
const totalGPUs = config.cluster.gpusPerNode * config.cluster.numNodes
|
| 127 |
+
const factorOptions = {
|
| 128 |
+
tp: getFactorOptions(config.cluster.gpusPerNode, config.parallelism.tp),
|
| 129 |
+
pp: getFactorOptions(totalGPUs, config.parallelism.pp),
|
| 130 |
+
cp: getFactorOptions(totalGPUs, config.parallelism.cp),
|
| 131 |
+
ep: getFactorOptions(config.cluster.gpusPerNode, config.parallelism.ep),
|
| 132 |
+
}
|
| 133 |
+
const gpuPresetId = getGpuPresetId(config.cluster.gpuType)
|
| 134 |
+
|
| 135 |
+
const updateTraining = <K extends keyof WorkbenchConfig['training']>(
|
| 136 |
+
key: K,
|
| 137 |
+
value: WorkbenchConfig['training'][K],
|
| 138 |
+
) => {
|
| 139 |
+
onChange({
|
| 140 |
+
...config,
|
| 141 |
+
customized: true,
|
| 142 |
+
training: {
|
| 143 |
+
...config.training,
|
| 144 |
+
[key]: value,
|
| 145 |
+
},
|
| 146 |
+
})
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
const updateModel = <K extends keyof WorkbenchConfig['model']>(
|
| 150 |
+
key: K,
|
| 151 |
+
value: WorkbenchConfig['model'][K],
|
| 152 |
+
) => {
|
| 153 |
+
onChange({
|
| 154 |
+
...config,
|
| 155 |
+
customized: true,
|
| 156 |
+
model: {
|
| 157 |
+
...config.model,
|
| 158 |
+
[key]: value,
|
| 159 |
+
},
|
| 160 |
+
})
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
const updateCluster = <K extends keyof WorkbenchConfig['cluster']>(
|
| 164 |
+
key: K,
|
| 165 |
+
value: WorkbenchConfig['cluster'][K],
|
| 166 |
+
) => {
|
| 167 |
+
onChange({
|
| 168 |
+
...config,
|
| 169 |
+
customized: true,
|
| 170 |
+
cluster: {
|
| 171 |
+
...config.cluster,
|
| 172 |
+
[key]: value,
|
| 173 |
+
},
|
| 174 |
+
})
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
const updateAttentionProfile = (
|
| 178 |
+
nextAttentionProfile: WorkbenchConfig['model']['attentionProfile'],
|
| 179 |
+
) => {
|
| 180 |
+
updateModel('attentionProfile', nextAttentionProfile)
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
const updateMoe = (nextMoe: NonNullable<WorkbenchConfig['model']['moe']> | undefined) => {
|
| 184 |
+
updateModel('moe', nextMoe)
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
const updateParallelism = <K extends keyof WorkbenchConfig['parallelism']>(
|
| 188 |
+
key: K,
|
| 189 |
+
value: WorkbenchConfig['parallelism'][K],
|
| 190 |
+
) => {
|
| 191 |
+
onChange({
|
| 192 |
+
...config,
|
| 193 |
+
customized: true,
|
| 194 |
+
parallelism: {
|
| 195 |
+
...config.parallelism,
|
| 196 |
+
[key]: value,
|
| 197 |
+
},
|
| 198 |
+
})
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
return (
|
| 202 |
+
<section className="controls-band">
|
| 203 |
+
<div className="controls-head">
|
| 204 |
+
<div>
|
| 205 |
+
<p className="mini-label">Controls</p>
|
| 206 |
+
<h2>Example / model / cluster / training / parallelism</h2>
|
| 207 |
+
</div>
|
| 208 |
+
|
| 209 |
+
<div className="controls-meta">
|
| 210 |
+
<span>{viewModel.analysis.totalGPUs.toLocaleString()} GPUs in cluster</span>
|
| 211 |
+
<span>
|
| 212 |
+
{config.parallelism.tp}×{config.parallelism.pp}×{config.parallelism.cp}×
|
| 213 |
+
{config.parallelism.ep} · DP {viewModel.analysis.derivedParallelism.dp}
|
| 214 |
+
</span>
|
| 215 |
+
<button type="button" className="reset-chip" onClick={onReset}>
|
| 216 |
+
reset
|
| 217 |
+
</button>
|
| 218 |
+
</div>
|
| 219 |
+
</div>
|
| 220 |
+
|
| 221 |
+
<div className="controls-stack">
|
| 222 |
+
<div className="control-card">
|
| 223 |
+
<div className="control-card-header">
|
| 224 |
+
<div>
|
| 225 |
+
<h3>Examples</h3>
|
| 226 |
+
<p>Built-in frontier runs as starting points for custom model and cluster edits</p>
|
| 227 |
+
</div>
|
| 228 |
+
{config.customized ? <span className="control-badge">customized</span> : null}
|
| 229 |
+
</div>
|
| 230 |
+
|
| 231 |
+
<div className="field-grid">
|
| 232 |
+
<SelectField
|
| 233 |
+
label="Example preset"
|
| 234 |
+
value={config.examplePresetId}
|
| 235 |
+
options={getExamplePresetOptions().map((option) => ({
|
| 236 |
+
value: option.id,
|
| 237 |
+
label: option.label,
|
| 238 |
+
}))}
|
| 239 |
+
onChange={(value) =>
|
| 240 |
+
onChange(applyExamplePreset(config, value as typeof config.examplePresetId))
|
| 241 |
+
}
|
| 242 |
+
/>
|
| 243 |
+
<SelectField
|
| 244 |
+
label="Phase"
|
| 245 |
+
value={config.phaseId}
|
| 246 |
+
options={getPhaseOptions(config.examplePresetId).map((option) => ({
|
| 247 |
+
value: option.id,
|
| 248 |
+
label: option.label,
|
| 249 |
+
}))}
|
| 250 |
+
onChange={(value) => onChange(applyExamplePhase(config, value as typeof config.phaseId))}
|
| 251 |
+
/>
|
| 252 |
+
</div>
|
| 253 |
+
</div>
|
| 254 |
+
|
| 255 |
+
<div className="control-card">
|
| 256 |
+
<div className="control-card-header">
|
| 257 |
+
<div>
|
| 258 |
+
<h3>Model</h3>
|
| 259 |
+
<p>Core architecture dimensions, attention layout, and optional MoE settings</p>
|
| 260 |
+
</div>
|
| 261 |
+
</div>
|
| 262 |
+
|
| 263 |
+
<div className="field-grid field-grid-wide">
|
| 264 |
+
<SelectField
|
| 265 |
+
label="Architecture"
|
| 266 |
+
value={config.model.architecture}
|
| 267 |
+
options={[
|
| 268 |
+
{ value: 'dense', label: 'dense' },
|
| 269 |
+
{ value: 'moe', label: 'moe' },
|
| 270 |
+
]}
|
| 271 |
+
onChange={(value) => {
|
| 272 |
+
const architecture = value as WorkbenchConfig['model']['architecture']
|
| 273 |
+
onChange({
|
| 274 |
+
...config,
|
| 275 |
+
customized: true,
|
| 276 |
+
model: {
|
| 277 |
+
...config.model,
|
| 278 |
+
architecture,
|
| 279 |
+
moe:
|
| 280 |
+
architecture === 'moe'
|
| 281 |
+
? (config.model.moe ?? {
|
| 282 |
+
numExperts: 64,
|
| 283 |
+
expertsPerToken: 2,
|
| 284 |
+
numDenseLayers: Math.min(config.model.numLayers, 4),
|
| 285 |
+
expertIntermediateSize: Math.max(1024, config.model.hiddenDim),
|
| 286 |
+
activeParamsPerToken: undefined,
|
| 287 |
+
})
|
| 288 |
+
: undefined,
|
| 289 |
+
},
|
| 290 |
+
})
|
| 291 |
+
}}
|
| 292 |
+
/>
|
| 293 |
+
<NumberField
|
| 294 |
+
label="Hidden dim"
|
| 295 |
+
value={config.model.hiddenDim}
|
| 296 |
+
min={128}
|
| 297 |
+
step={128}
|
| 298 |
+
onChange={(value) => updateModel('hiddenDim', value)}
|
| 299 |
+
/>
|
| 300 |
+
<NumberField
|
| 301 |
+
label="Layers"
|
| 302 |
+
value={config.model.numLayers}
|
| 303 |
+
min={1}
|
| 304 |
+
onChange={(value) => updateModel('numLayers', value)}
|
| 305 |
+
/>
|
| 306 |
+
<NumberField
|
| 307 |
+
label="Attention heads"
|
| 308 |
+
value={config.model.numHeads}
|
| 309 |
+
min={1}
|
| 310 |
+
onChange={(value) => updateModel('numHeads', value)}
|
| 311 |
+
/>
|
| 312 |
+
<NumberField
|
| 313 |
+
label="KV heads"
|
| 314 |
+
value={config.model.numKVHeads}
|
| 315 |
+
min={1}
|
| 316 |
+
onChange={(value) => updateModel('numKVHeads', value)}
|
| 317 |
+
/>
|
| 318 |
+
<NumberField
|
| 319 |
+
label="Vocab size"
|
| 320 |
+
value={config.model.vocabSize}
|
| 321 |
+
min={256}
|
| 322 |
+
onChange={(value) => updateModel('vocabSize', value)}
|
| 323 |
+
/>
|
| 324 |
+
<NumberField
|
| 325 |
+
label="Intermediate size"
|
| 326 |
+
value={config.model.intermediateSize}
|
| 327 |
+
min={256}
|
| 328 |
+
onChange={(value) => updateModel('intermediateSize', value)}
|
| 329 |
+
/>
|
| 330 |
+
<SelectField
|
| 331 |
+
label="Attention profile"
|
| 332 |
+
value={config.model.attentionProfile?.type ?? 'full'}
|
| 333 |
+
options={[
|
| 334 |
+
{ value: 'full', label: 'full attention' },
|
| 335 |
+
{ value: 'hybrid', label: 'hybrid attention' },
|
| 336 |
+
]}
|
| 337 |
+
onChange={(value) => {
|
| 338 |
+
if (value === 'full') {
|
| 339 |
+
updateAttentionProfile({ type: 'full' })
|
| 340 |
+
return
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
updateAttentionProfile({
|
| 344 |
+
type: 'hybrid',
|
| 345 |
+
slidingWindowSize: config.model.attentionProfile?.slidingWindowSize ?? 4096,
|
| 346 |
+
globalAttentionFraction: config.model.attentionProfile?.globalAttentionFraction,
|
| 347 |
+
globalAttentionEveryN: config.model.attentionProfile?.globalAttentionEveryN,
|
| 348 |
+
})
|
| 349 |
+
}}
|
| 350 |
+
/>
|
| 351 |
+
<div className="control-field control-field-toggle">
|
| 352 |
+
<span>Tied embeddings</span>
|
| 353 |
+
<ToggleField
|
| 354 |
+
label="share output head"
|
| 355 |
+
checked={config.model.tiedEmbeddings}
|
| 356 |
+
onChange={(value) => updateModel('tiedEmbeddings', value)}
|
| 357 |
+
/>
|
| 358 |
+
</div>
|
| 359 |
+
|
| 360 |
+
{config.model.attentionProfile?.type === 'hybrid' ? (
|
| 361 |
+
<>
|
| 362 |
+
<NumberField
|
| 363 |
+
label="Sliding window"
|
| 364 |
+
value={config.model.attentionProfile.slidingWindowSize ?? 4096}
|
| 365 |
+
min={0}
|
| 366 |
+
step={256}
|
| 367 |
+
onChange={(value) =>
|
| 368 |
+
updateAttentionProfile({
|
| 369 |
+
...config.model.attentionProfile,
|
| 370 |
+
type: 'hybrid',
|
| 371 |
+
slidingWindowSize: value > 0 ? value : undefined,
|
| 372 |
+
})
|
| 373 |
+
}
|
| 374 |
+
/>
|
| 375 |
+
<NumberField
|
| 376 |
+
label="Global attention fraction"
|
| 377 |
+
value={config.model.attentionProfile.globalAttentionFraction ?? 0}
|
| 378 |
+
min={0}
|
| 379 |
+
step={0.05}
|
| 380 |
+
onChange={(value) =>
|
| 381 |
+
updateAttentionProfile({
|
| 382 |
+
...config.model.attentionProfile,
|
| 383 |
+
type: 'hybrid',
|
| 384 |
+
globalAttentionFraction: value > 0 ? value : undefined,
|
| 385 |
+
})
|
| 386 |
+
}
|
| 387 |
+
/>
|
| 388 |
+
<NumberField
|
| 389 |
+
label="Global every N"
|
| 390 |
+
value={config.model.attentionProfile.globalAttentionEveryN ?? 0}
|
| 391 |
+
min={0}
|
| 392 |
+
onChange={(value) =>
|
| 393 |
+
updateAttentionProfile({
|
| 394 |
+
...config.model.attentionProfile,
|
| 395 |
+
type: 'hybrid',
|
| 396 |
+
globalAttentionEveryN: value > 0 ? value : undefined,
|
| 397 |
+
})
|
| 398 |
+
}
|
| 399 |
+
/>
|
| 400 |
+
</>
|
| 401 |
+
) : null}
|
| 402 |
+
|
| 403 |
+
{config.model.architecture === 'moe' && config.model.moe ? (
|
| 404 |
+
<>
|
| 405 |
+
<NumberField
|
| 406 |
+
label="Experts"
|
| 407 |
+
value={config.model.moe.numExperts}
|
| 408 |
+
min={1}
|
| 409 |
+
onChange={(value) => updateMoe({ ...config.model.moe!, numExperts: value })}
|
| 410 |
+
/>
|
| 411 |
+
<NumberField
|
| 412 |
+
label="Experts per token"
|
| 413 |
+
value={config.model.moe.expertsPerToken}
|
| 414 |
+
min={1}
|
| 415 |
+
onChange={(value) =>
|
| 416 |
+
updateMoe({ ...config.model.moe!, expertsPerToken: value })
|
| 417 |
+
}
|
| 418 |
+
/>
|
| 419 |
+
<NumberField
|
| 420 |
+
label="Dense layers"
|
| 421 |
+
value={config.model.moe.numDenseLayers}
|
| 422 |
+
min={0}
|
| 423 |
+
onChange={(value) => updateMoe({ ...config.model.moe!, numDenseLayers: value })}
|
| 424 |
+
/>
|
| 425 |
+
<NumberField
|
| 426 |
+
label="Expert intermediate"
|
| 427 |
+
value={config.model.moe.expertIntermediateSize}
|
| 428 |
+
min={256}
|
| 429 |
+
onChange={(value) =>
|
| 430 |
+
updateMoe({ ...config.model.moe!, expertIntermediateSize: value })
|
| 431 |
+
}
|
| 432 |
+
/>
|
| 433 |
+
<NumberField
|
| 434 |
+
label="Active params / token"
|
| 435 |
+
value={config.model.moe.activeParamsPerToken ?? 0}
|
| 436 |
+
min={0}
|
| 437 |
+
step={1000000}
|
| 438 |
+
onChange={(value) =>
|
| 439 |
+
updateMoe({
|
| 440 |
+
...config.model.moe!,
|
| 441 |
+
activeParamsPerToken: value > 0 ? value : undefined,
|
| 442 |
+
})
|
| 443 |
+
}
|
| 444 |
+
/>
|
| 445 |
+
</>
|
| 446 |
+
) : null}
|
| 447 |
+
</div>
|
| 448 |
+
</div>
|
| 449 |
+
|
| 450 |
+
<div className="control-card">
|
| 451 |
+
<div className="control-card-header">
|
| 452 |
+
<div>
|
| 453 |
+
<h3>Cluster</h3>
|
| 454 |
+
<p>GPU spec, node count, and rack-local topology for the physical fabric</p>
|
| 455 |
+
</div>
|
| 456 |
+
</div>
|
| 457 |
+
|
| 458 |
+
<div className="field-grid field-grid-wide">
|
| 459 |
+
<SelectField
|
| 460 |
+
label="GPU preset"
|
| 461 |
+
value={gpuPresetId}
|
| 462 |
+
options={[
|
| 463 |
+
...getGpuPresetOptions().map((option) => ({
|
| 464 |
+
value: option.id,
|
| 465 |
+
label: option.label,
|
| 466 |
+
})),
|
| 467 |
+
{ value: 'custom', label: 'custom GPU' },
|
| 468 |
+
]}
|
| 469 |
+
onChange={(value) => {
|
| 470 |
+
if (value === 'custom') {
|
| 471 |
+
return
|
| 472 |
+
}
|
| 473 |
+
|
| 474 |
+
onChange(applyGpuPreset(config, value as Parameters<typeof applyGpuPreset>[1]))
|
| 475 |
+
}}
|
| 476 |
+
/>
|
| 477 |
+
<label className="control-field">
|
| 478 |
+
<span>GPU name</span>
|
| 479 |
+
<input
|
| 480 |
+
type="text"
|
| 481 |
+
value={config.cluster.gpuType.name}
|
| 482 |
+
onChange={(event) =>
|
| 483 |
+
updateCluster('gpuType', {
|
| 484 |
+
...config.cluster.gpuType,
|
| 485 |
+
name: event.target.value,
|
| 486 |
+
})
|
| 487 |
+
}
|
| 488 |
+
/>
|
| 489 |
+
</label>
|
| 490 |
+
<NumberField
|
| 491 |
+
label="HBM capacity (GB)"
|
| 492 |
+
value={config.cluster.gpuType.hbmCapacityGB}
|
| 493 |
+
min={1}
|
| 494 |
+
onChange={(value) =>
|
| 495 |
+
updateCluster('gpuType', {
|
| 496 |
+
...config.cluster.gpuType,
|
| 497 |
+
hbmCapacityGB: value,
|
| 498 |
+
})
|
| 499 |
+
}
|
| 500 |
+
/>
|
| 501 |
+
<NumberField
|
| 502 |
+
label="Peak BF16 TFLOPs"
|
| 503 |
+
value={config.cluster.gpuType.peakTFLOPsBF16}
|
| 504 |
+
min={1}
|
| 505 |
+
onChange={(value) =>
|
| 506 |
+
updateCluster('gpuType', {
|
| 507 |
+
...config.cluster.gpuType,
|
| 508 |
+
peakTFLOPsBF16: value,
|
| 509 |
+
})
|
| 510 |
+
}
|
| 511 |
+
/>
|
| 512 |
+
<NumberField
|
| 513 |
+
label="HBM bandwidth (TB/s)"
|
| 514 |
+
value={config.cluster.gpuType.memBandwidthTBs}
|
| 515 |
+
min={0.1}
|
| 516 |
+
step={0.05}
|
| 517 |
+
onChange={(value) =>
|
| 518 |
+
updateCluster('gpuType', {
|
| 519 |
+
...config.cluster.gpuType,
|
| 520 |
+
memBandwidthTBs: value,
|
| 521 |
+
})
|
| 522 |
+
}
|
| 523 |
+
/>
|
| 524 |
+
<NumberField
|
| 525 |
+
label="GPUs per node"
|
| 526 |
+
value={config.cluster.gpusPerNode}
|
| 527 |
+
min={1}
|
| 528 |
+
onChange={(value) => updateCluster('gpusPerNode', value)}
|
| 529 |
+
/>
|
| 530 |
+
<NumberField
|
| 531 |
+
label="Nodes"
|
| 532 |
+
value={config.cluster.numNodes}
|
| 533 |
+
min={1}
|
| 534 |
+
onChange={(value) => updateCluster('numNodes', value)}
|
| 535 |
+
/>
|
| 536 |
+
<NumberField
|
| 537 |
+
label="Nodes per rack"
|
| 538 |
+
value={config.cluster.nodesPerRack ?? config.cluster.numNodes}
|
| 539 |
+
min={1}
|
| 540 |
+
onChange={(value) => updateCluster('nodesPerRack', value)}
|
| 541 |
+
/>
|
| 542 |
+
<NumberField
|
| 543 |
+
label="Intra-node bandwidth (GB/s)"
|
| 544 |
+
value={config.cluster.intraNodeBandwidthGBs}
|
| 545 |
+
min={1}
|
| 546 |
+
onChange={(value) => updateCluster('intraNodeBandwidthGBs', value)}
|
| 547 |
+
/>
|
| 548 |
+
<NumberField
|
| 549 |
+
label="Inter-node bandwidth (GB/s)"
|
| 550 |
+
value={config.cluster.interNodeBandwidthGBs}
|
| 551 |
+
min={1}
|
| 552 |
+
onChange={(value) => updateCluster('interNodeBandwidthGBs', value)}
|
| 553 |
+
/>
|
| 554 |
+
</div>
|
| 555 |
+
</div>
|
| 556 |
+
|
| 557 |
+
<div className="control-card">
|
| 558 |
+
<div className="control-card-header">
|
| 559 |
+
<div>
|
| 560 |
+
<h3>Training</h3>
|
| 561 |
+
<p>Batching, precision, optimizer state, and recompute</p>
|
| 562 |
+
</div>
|
| 563 |
+
</div>
|
| 564 |
+
|
| 565 |
+
<div className="field-grid field-grid-wide">
|
| 566 |
+
<NumberField
|
| 567 |
+
label="Micro-batch"
|
| 568 |
+
value={config.training.microBatchSize}
|
| 569 |
+
min={1}
|
| 570 |
+
onChange={(value) => updateTraining('microBatchSize', value)}
|
| 571 |
+
/>
|
| 572 |
+
<NumberField
|
| 573 |
+
label="Seq length"
|
| 574 |
+
value={config.training.seqLength}
|
| 575 |
+
min={256}
|
| 576 |
+
step={256}
|
| 577 |
+
onChange={(value) => updateTraining('seqLength', value)}
|
| 578 |
+
/>
|
| 579 |
+
<NumberField
|
| 580 |
+
label="Grad accum"
|
| 581 |
+
value={config.training.gradAccumSteps}
|
| 582 |
+
min={1}
|
| 583 |
+
onChange={(value) => updateTraining('gradAccumSteps', value)}
|
| 584 |
+
/>
|
| 585 |
+
<SelectField
|
| 586 |
+
label="Precision"
|
| 587 |
+
value={config.training.precision}
|
| 588 |
+
options={[
|
| 589 |
+
{ value: 'bf16', label: 'bf16' },
|
| 590 |
+
{ value: 'fp16', label: 'fp16' },
|
| 591 |
+
{ value: 'fp8', label: 'fp8' },
|
| 592 |
+
{ value: 'fp32', label: 'fp32' },
|
| 593 |
+
]}
|
| 594 |
+
onChange={(value) =>
|
| 595 |
+
updateTraining('precision', value as WorkbenchConfig['training']['precision'])
|
| 596 |
+
}
|
| 597 |
+
/>
|
| 598 |
+
<SelectField
|
| 599 |
+
label="Optimizer"
|
| 600 |
+
value={config.training.optimizer}
|
| 601 |
+
options={[
|
| 602 |
+
{ value: 'adamw', label: 'adamw' },
|
| 603 |
+
{ value: 'adam', label: 'adam' },
|
| 604 |
+
{ value: 'muon', label: 'muon' },
|
| 605 |
+
{ value: 'sgd', label: 'sgd' },
|
| 606 |
+
]}
|
| 607 |
+
onChange={(value) =>
|
| 608 |
+
updateTraining('optimizer', value as WorkbenchConfig['training']['optimizer'])
|
| 609 |
+
}
|
| 610 |
+
/>
|
| 611 |
+
<div className="control-field control-field-toggle">
|
| 612 |
+
<span>Activation recompute</span>
|
| 613 |
+
<ToggleField
|
| 614 |
+
label="checkpointing enabled"
|
| 615 |
+
checked={config.training.activationCheckpointing}
|
| 616 |
+
onChange={(value) => updateTraining('activationCheckpointing', value)}
|
| 617 |
+
/>
|
| 618 |
+
</div>
|
| 619 |
+
</div>
|
| 620 |
+
</div>
|
| 621 |
+
|
| 622 |
+
<div className="controls-grid controls-grid-parallelism">
|
| 623 |
+
<OptionStrip
|
| 624 |
+
label="TP"
|
| 625 |
+
caption="tensor shards"
|
| 626 |
+
value={config.parallelism.tp}
|
| 627 |
+
options={factorOptions.tp}
|
| 628 |
+
onSelect={(value) => updateParallelism('tp', value)}
|
| 629 |
+
/>
|
| 630 |
+
<OptionStrip
|
| 631 |
+
label="PP"
|
| 632 |
+
caption="pipeline stages"
|
| 633 |
+
value={config.parallelism.pp}
|
| 634 |
+
options={factorOptions.pp}
|
| 635 |
+
onSelect={(value) => updateParallelism('pp', value)}
|
| 636 |
+
/>
|
| 637 |
+
<OptionStrip
|
| 638 |
+
label="CP"
|
| 639 |
+
caption="context shards"
|
| 640 |
+
value={config.parallelism.cp}
|
| 641 |
+
options={factorOptions.cp}
|
| 642 |
+
onSelect={(value) => updateParallelism('cp', value)}
|
| 643 |
+
/>
|
| 644 |
+
<OptionStrip
|
| 645 |
+
label="EP"
|
| 646 |
+
caption="expert lanes"
|
| 647 |
+
value={config.parallelism.ep}
|
| 648 |
+
options={factorOptions.ep}
|
| 649 |
+
onSelect={(value) => updateParallelism('ep', value)}
|
| 650 |
+
/>
|
| 651 |
+
<OptionStrip
|
| 652 |
+
label="ZeRO"
|
| 653 |
+
caption="state sharding"
|
| 654 |
+
value={config.parallelism.zeroStage}
|
| 655 |
+
options={[0, 1, 2, 3]}
|
| 656 |
+
onSelect={(value) => updateParallelism('zeroStage', value as 0 | 1 | 2 | 3)}
|
| 657 |
+
/>
|
| 658 |
+
<div className="control-card">
|
| 659 |
+
<div className="control-card-header">
|
| 660 |
+
<div>
|
| 661 |
+
<h3>Megatron-style derived DP</h3>
|
| 662 |
+
<p>DP is derived from cluster size and the other parallel axes</p>
|
| 663 |
+
</div>
|
| 664 |
+
<span className="control-badge">{viewModel.analysis.derivedParallelism.dp}x</span>
|
| 665 |
+
</div>
|
| 666 |
+
<div className="field-grid">
|
| 667 |
+
<NumberField
|
| 668 |
+
label="FSDP shard group"
|
| 669 |
+
value={config.parallelism.fsdpShardGroupSize}
|
| 670 |
+
min={0}
|
| 671 |
+
step={8}
|
| 672 |
+
onChange={(value) => updateParallelism('fsdpShardGroupSize', value)}
|
| 673 |
+
/>
|
| 674 |
+
<div className="control-field control-field-toggle">
|
| 675 |
+
<span>Distributed optimizer</span>
|
| 676 |
+
<ToggleField
|
| 677 |
+
label="optimizer state sharding"
|
| 678 |
+
checked={config.parallelism.distributedOptimizer}
|
| 679 |
+
onChange={(value) => updateParallelism('distributedOptimizer', value)}
|
| 680 |
+
/>
|
| 681 |
+
</div>
|
| 682 |
+
</div>
|
| 683 |
+
</div>
|
| 684 |
+
</div>
|
| 685 |
+
</div>
|
| 686 |
+
</section>
|
| 687 |
+
)
|
| 688 |
+
}
|
src/components/pixi/PixiSurface.tsx
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { Application } from '@pixi/react'
|
| 2 |
+
import { useEffect, useRef, type ReactNode } from 'react'
|
| 3 |
+
import { useElementSize } from '../../hooks/useElementSize'
|
| 4 |
+
|
| 5 |
+
type PixiSurfaceProps = {
|
| 6 |
+
className?: string
|
| 7 |
+
canvasClassName?: string
|
| 8 |
+
testId?: string
|
| 9 |
+
onSizeChange?: (width: number, height: number) => void
|
| 10 |
+
children: (size: { width: number; height: number }) => ReactNode
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
export function PixiSurface({
|
| 14 |
+
className,
|
| 15 |
+
canvasClassName,
|
| 16 |
+
testId,
|
| 17 |
+
onSizeChange,
|
| 18 |
+
children,
|
| 19 |
+
}: PixiSurfaceProps) {
|
| 20 |
+
const hostRef = useRef<HTMLDivElement>(null)
|
| 21 |
+
const size = useElementSize(hostRef)
|
| 22 |
+
|
| 23 |
+
useEffect(() => {
|
| 24 |
+
if (!onSizeChange || size.width === 0 || size.height === 0) {
|
| 25 |
+
return
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
onSizeChange(size.width, size.height)
|
| 29 |
+
}, [onSizeChange, size.height, size.width])
|
| 30 |
+
|
| 31 |
+
return (
|
| 32 |
+
<div ref={hostRef} className={className} data-testid={testId}>
|
| 33 |
+
{size.width > 0 && size.height > 0 ? (
|
| 34 |
+
<Application
|
| 35 |
+
className={canvasClassName}
|
| 36 |
+
resizeTo={hostRef}
|
| 37 |
+
preference="webgl"
|
| 38 |
+
antialias
|
| 39 |
+
autoDensity
|
| 40 |
+
backgroundAlpha={0}
|
| 41 |
+
clearBeforeRender
|
| 42 |
+
sharedTicker={false}
|
| 43 |
+
resolution={1}
|
| 44 |
+
>
|
| 45 |
+
{children(size)}
|
| 46 |
+
</Application>
|
| 47 |
+
) : null}
|
| 48 |
+
</div>
|
| 49 |
+
)
|
| 50 |
+
}
|
src/hooks/useElementSize.ts
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { useEffect, useState, type RefObject } from 'react'
|
| 2 |
+
|
| 3 |
+
type ElementSize = {
|
| 4 |
+
width: number
|
| 5 |
+
height: number
|
| 6 |
+
}
|
| 7 |
+
|
| 8 |
+
export function useElementSize<T extends HTMLElement>(
|
| 9 |
+
ref: RefObject<T | null>,
|
| 10 |
+
): ElementSize {
|
| 11 |
+
const [size, setSize] = useState<ElementSize>({ width: 0, height: 0 })
|
| 12 |
+
|
| 13 |
+
useEffect(() => {
|
| 14 |
+
const element = ref.current
|
| 15 |
+
if (!element) {
|
| 16 |
+
return
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
const observer = new ResizeObserver((entries) => {
|
| 20 |
+
const entry = entries[0]
|
| 21 |
+
if (!entry) {
|
| 22 |
+
return
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
const nextWidth = Math.round(entry.contentRect.width)
|
| 26 |
+
const nextHeight = Math.round(entry.contentRect.height)
|
| 27 |
+
|
| 28 |
+
setSize((current) => {
|
| 29 |
+
if (current.width === nextWidth && current.height === nextHeight) {
|
| 30 |
+
return current
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
return {
|
| 34 |
+
width: nextWidth,
|
| 35 |
+
height: nextHeight,
|
| 36 |
+
}
|
| 37 |
+
})
|
| 38 |
+
})
|
| 39 |
+
|
| 40 |
+
observer.observe(element)
|
| 41 |
+
|
| 42 |
+
return () => {
|
| 43 |
+
observer.disconnect()
|
| 44 |
+
}
|
| 45 |
+
}, [ref])
|
| 46 |
+
|
| 47 |
+
return size
|
| 48 |
+
}
|
src/index.css
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
:root {
|
| 2 |
+
--font-sans: 'Space Grotesk', sans-serif;
|
| 3 |
+
--font-mono: 'IBM Plex Mono', monospace;
|
| 4 |
+
--ink-strong: #16262d;
|
| 5 |
+
--ink-soft: #526771;
|
| 6 |
+
--ink-muted: #7d8f98;
|
| 7 |
+
--accent-cool: #117a70;
|
| 8 |
+
--accent-warm: #d66225;
|
| 9 |
+
--panel-stroke: rgba(24, 64, 86, 0.08);
|
| 10 |
+
color: var(--ink-strong);
|
| 11 |
+
font-family: var(--font-sans);
|
| 12 |
+
font-synthesis: none;
|
| 13 |
+
font-weight: 400;
|
| 14 |
+
line-height: 1.5;
|
| 15 |
+
text-rendering: optimizeLegibility;
|
| 16 |
+
-moz-osx-font-smoothing: grayscale;
|
| 17 |
+
-webkit-font-smoothing: antialiased;
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
* {
|
| 21 |
+
box-sizing: border-box;
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
html {
|
| 25 |
+
min-width: 320px;
|
| 26 |
+
min-height: 100%;
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
body {
|
| 30 |
+
min-width: 320px;
|
| 31 |
+
min-height: 100vh;
|
| 32 |
+
margin: 0;
|
| 33 |
+
background:
|
| 34 |
+
radial-gradient(circle at 0% 0%, rgba(17, 122, 112, 0.08), transparent 24%),
|
| 35 |
+
radial-gradient(circle at 100% 0%, rgba(214, 98, 37, 0.1), transparent 20%),
|
| 36 |
+
linear-gradient(180deg, #f3f0e8 0%, #efebe0 48%, #ebe6d9 100%);
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
button,
|
| 40 |
+
input,
|
| 41 |
+
select,
|
| 42 |
+
textarea {
|
| 43 |
+
font: inherit;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
button {
|
| 47 |
+
cursor: pointer;
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
a {
|
| 51 |
+
color: inherit;
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
#root {
|
| 55 |
+
min-height: 100vh;
|
| 56 |
+
}
|
src/lib/linkedFocus.ts
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
export type LinkedFocus = {
|
| 2 |
+
source: 'transformer'
|
| 3 |
+
label: string
|
| 4 |
+
stage: number | null
|
| 5 |
+
tpLane: number | null
|
| 6 |
+
cpShard: number | null
|
| 7 |
+
sequenceBand: number | null
|
| 8 |
+
}
|
| 9 |
+
|
| 10 |
+
type FocusableGpu = {
|
| 11 |
+
active: boolean
|
| 12 |
+
stage: number
|
| 13 |
+
tpLane: number
|
| 14 |
+
cpShard: number
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
export function matchesLinkedFocus(gpu: FocusableGpu, focus: LinkedFocus | null) {
|
| 18 |
+
if (!focus || !gpu.active) {
|
| 19 |
+
return false
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
if (focus.stage !== null && gpu.stage !== focus.stage) {
|
| 23 |
+
return false
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
if (focus.tpLane !== null && gpu.tpLane !== focus.tpLane) {
|
| 27 |
+
return false
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
if (focus.cpShard !== null && gpu.cpShard !== focus.cpShard) {
|
| 31 |
+
return false
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
return true
|
| 35 |
+
}
|
src/lib/topologyLod.ts
ADDED
|
@@ -0,0 +1,216 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
export type TopologyLodBand =
|
| 2 |
+
| 'overview'
|
| 3 |
+
| 'board'
|
| 4 |
+
| 'package'
|
| 5 |
+
| 'silicon'
|
| 6 |
+
| 'micro'
|
| 7 |
+
|
| 8 |
+
export type TopologyLodWeights = Record<TopologyLodBand, number>
|
| 9 |
+
|
| 10 |
+
export type TopologyLodPolicy = {
|
| 11 |
+
minScale: number
|
| 12 |
+
maxScale: number
|
| 13 |
+
bands: Record<
|
| 14 |
+
TopologyLodBand,
|
| 15 |
+
{
|
| 16 |
+
fadeInStart: number
|
| 17 |
+
fadeInEnd: number
|
| 18 |
+
fadeOutStart: number
|
| 19 |
+
fadeOutEnd: number
|
| 20 |
+
}
|
| 21 |
+
>
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
export type TopologyLodState = {
|
| 25 |
+
scale: number
|
| 26 |
+
primaryBand: TopologyLodBand
|
| 27 |
+
weights: TopologyLodWeights
|
| 28 |
+
deepIsolation: number
|
| 29 |
+
textScale: number
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
const clamp = (value: number, min: number, max: number) =>
|
| 33 |
+
Math.min(Math.max(value, min), max)
|
| 34 |
+
|
| 35 |
+
export const TOPOLOGY_LOD_POLICY: TopologyLodPolicy = {
|
| 36 |
+
minScale: 0.035,
|
| 37 |
+
maxScale: 420,
|
| 38 |
+
bands: {
|
| 39 |
+
overview: {
|
| 40 |
+
fadeInStart: 0.035,
|
| 41 |
+
fadeInEnd: 0.06,
|
| 42 |
+
fadeOutStart: 1.4,
|
| 43 |
+
fadeOutEnd: 3.8,
|
| 44 |
+
},
|
| 45 |
+
board: {
|
| 46 |
+
fadeInStart: 0.12,
|
| 47 |
+
fadeInEnd: 0.55,
|
| 48 |
+
fadeOutStart: 7,
|
| 49 |
+
fadeOutEnd: 18,
|
| 50 |
+
},
|
| 51 |
+
package: {
|
| 52 |
+
fadeInStart: 1.8,
|
| 53 |
+
fadeInEnd: 4.8,
|
| 54 |
+
fadeOutStart: 28,
|
| 55 |
+
fadeOutEnd: 70,
|
| 56 |
+
},
|
| 57 |
+
silicon: {
|
| 58 |
+
fadeInStart: 10,
|
| 59 |
+
fadeInEnd: 24,
|
| 60 |
+
fadeOutStart: 95,
|
| 61 |
+
fadeOutEnd: 220,
|
| 62 |
+
},
|
| 63 |
+
micro: {
|
| 64 |
+
fadeInStart: 36,
|
| 65 |
+
fadeInEnd: 95,
|
| 66 |
+
fadeOutStart: 420,
|
| 67 |
+
fadeOutEnd: 420,
|
| 68 |
+
},
|
| 69 |
+
},
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
export const saturate = (value: number) => clamp(value, 0, 1)
|
| 73 |
+
|
| 74 |
+
export const smoothstep = (value: number) => {
|
| 75 |
+
const safe = saturate(value)
|
| 76 |
+
return safe * safe * (3 - 2 * safe)
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
export const fadeBetween = (scale: number, start: number, end: number) => {
|
| 80 |
+
if (start === end) {
|
| 81 |
+
return scale >= end ? 1 : 0
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
return smoothstep((scale - start) / (end - start))
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
export const bandWeight = (
|
| 88 |
+
scale: number,
|
| 89 |
+
{
|
| 90 |
+
fadeInStart,
|
| 91 |
+
fadeInEnd,
|
| 92 |
+
fadeOutStart,
|
| 93 |
+
fadeOutEnd,
|
| 94 |
+
}: TopologyLodPolicy['bands'][TopologyLodBand],
|
| 95 |
+
) => {
|
| 96 |
+
const fadeIn = fadeBetween(scale, fadeInStart, fadeInEnd)
|
| 97 |
+
const fadeOut =
|
| 98 |
+
fadeOutStart === fadeOutEnd ? 1 : 1 - fadeBetween(scale, fadeOutStart, fadeOutEnd)
|
| 99 |
+
|
| 100 |
+
return saturate(fadeIn * fadeOut)
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
const BAND_ORDER: TopologyLodBand[] = [
|
| 104 |
+
'overview',
|
| 105 |
+
'board',
|
| 106 |
+
'package',
|
| 107 |
+
'silicon',
|
| 108 |
+
'micro',
|
| 109 |
+
]
|
| 110 |
+
|
| 111 |
+
const createExclusiveWeights = (scale: number): TopologyLodWeights => {
|
| 112 |
+
const weights: TopologyLodWeights = {
|
| 113 |
+
overview: 0,
|
| 114 |
+
board: 0,
|
| 115 |
+
package: 0,
|
| 116 |
+
silicon: 0,
|
| 117 |
+
micro: 0,
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
const transitions = [
|
| 121 |
+
{
|
| 122 |
+
from: 'overview' as const,
|
| 123 |
+
to: 'board' as const,
|
| 124 |
+
start: TOPOLOGY_LOD_POLICY.bands.board.fadeInStart,
|
| 125 |
+
end: TOPOLOGY_LOD_POLICY.bands.board.fadeInEnd,
|
| 126 |
+
},
|
| 127 |
+
{
|
| 128 |
+
from: 'board' as const,
|
| 129 |
+
to: 'package' as const,
|
| 130 |
+
start: TOPOLOGY_LOD_POLICY.bands.package.fadeInStart,
|
| 131 |
+
end: TOPOLOGY_LOD_POLICY.bands.package.fadeInEnd,
|
| 132 |
+
},
|
| 133 |
+
{
|
| 134 |
+
from: 'package' as const,
|
| 135 |
+
to: 'silicon' as const,
|
| 136 |
+
start: TOPOLOGY_LOD_POLICY.bands.silicon.fadeInStart,
|
| 137 |
+
end: TOPOLOGY_LOD_POLICY.bands.silicon.fadeInEnd,
|
| 138 |
+
},
|
| 139 |
+
{
|
| 140 |
+
from: 'silicon' as const,
|
| 141 |
+
to: 'micro' as const,
|
| 142 |
+
start: TOPOLOGY_LOD_POLICY.bands.micro.fadeInStart,
|
| 143 |
+
end: TOPOLOGY_LOD_POLICY.bands.micro.fadeInEnd,
|
| 144 |
+
},
|
| 145 |
+
]
|
| 146 |
+
|
| 147 |
+
for (let index = 0; index < transitions.length; index += 1) {
|
| 148 |
+
const transition = transitions[index]
|
| 149 |
+
const previous = transitions[index - 1]
|
| 150 |
+
const next = transitions[index + 1]
|
| 151 |
+
|
| 152 |
+
if (scale < transition.start) {
|
| 153 |
+
weights[transition.from] = 1
|
| 154 |
+
return weights
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
if (scale <= transition.end) {
|
| 158 |
+
const alpha = fadeBetween(scale, transition.start, transition.end)
|
| 159 |
+
weights[transition.from] = 1 - alpha
|
| 160 |
+
weights[transition.to] = alpha
|
| 161 |
+
return weights
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
if (next && scale > transition.end && scale < next.start) {
|
| 165 |
+
weights[transition.to] = 1
|
| 166 |
+
return weights
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
if (!next && scale > transition.end) {
|
| 170 |
+
weights.micro = 1
|
| 171 |
+
return weights
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
if (!previous && scale <= transition.start) {
|
| 175 |
+
weights.overview = 1
|
| 176 |
+
return weights
|
| 177 |
+
}
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
weights.micro = 1
|
| 181 |
+
return weights
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
export const getTopologyLodState = (scale: number): TopologyLodState => {
|
| 185 |
+
const weights = createExclusiveWeights(scale)
|
| 186 |
+
const primaryBand =
|
| 187 |
+
BAND_ORDER.find((band) => weights[band] >= 0.5) ??
|
| 188 |
+
(Object.entries(weights).sort((left, right) => right[1] - left[1])[0]?.[0] as
|
| 189 |
+
| TopologyLodBand
|
| 190 |
+
| undefined) ??
|
| 191 |
+
'overview'
|
| 192 |
+
|
| 193 |
+
return {
|
| 194 |
+
scale,
|
| 195 |
+
primaryBand,
|
| 196 |
+
weights,
|
| 197 |
+
deepIsolation: fadeBetween(scale, 22, 80),
|
| 198 |
+
textScale: 1 - fadeBetween(scale, 14, 36) * 0.4,
|
| 199 |
+
}
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
export const screenWorld = (
|
| 203 |
+
scale: number,
|
| 204 |
+
pixels: number,
|
| 205 |
+
minWorld = 0.04,
|
| 206 |
+
maxWorld = 3.5,
|
| 207 |
+
) => clamp(pixels / Math.max(scale, 0.001), minWorld, maxWorld)
|
| 208 |
+
|
| 209 |
+
export const screenStroke = (
|
| 210 |
+
scale: number,
|
| 211 |
+
pixels: number,
|
| 212 |
+
minWorld = 0.04,
|
| 213 |
+
maxWorld = 2.8,
|
| 214 |
+
) => screenWorld(scale, pixels, minWorld, maxWorld)
|
| 215 |
+
|
| 216 |
+
export const mix = (from: number, to: number, alpha: number) => from + (to - from) * alpha
|
src/lib/topologyScene.ts
ADDED
|
@@ -0,0 +1,980 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { type WorkbenchViewModel } from './workbenchPresenter'
|
| 2 |
+
import { TOPOLOGY_LOD_POLICY, type TopologyLodPolicy } from './topologyLod'
|
| 3 |
+
|
| 4 |
+
export type ViewportState = {
|
| 5 |
+
x: number
|
| 6 |
+
y: number
|
| 7 |
+
scale: number
|
| 8 |
+
}
|
| 9 |
+
|
| 10 |
+
export type HitTargetKind = 'pod' | 'node' | 'gpu' | 'link'
|
| 11 |
+
export type LinkKind = 'row' | 'column' | 'bus'
|
| 12 |
+
|
| 13 |
+
export type SceneHitBounds = {
|
| 14 |
+
x: number
|
| 15 |
+
y: number
|
| 16 |
+
width: number
|
| 17 |
+
height: number
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
export type SceneAnchorFrame = {
|
| 21 |
+
x: number
|
| 22 |
+
y: number
|
| 23 |
+
width: number
|
| 24 |
+
height: number
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
export type ScenePod = {
|
| 28 |
+
id: string
|
| 29 |
+
kind: 'pod'
|
| 30 |
+
index: number
|
| 31 |
+
x: number
|
| 32 |
+
y: number
|
| 33 |
+
width: number
|
| 34 |
+
height: number
|
| 35 |
+
centerX: number
|
| 36 |
+
centerY: number
|
| 37 |
+
active: boolean
|
| 38 |
+
load: number
|
| 39 |
+
thermal: number
|
| 40 |
+
activeGpus: number
|
| 41 |
+
totalGpus: number
|
| 42 |
+
title: string
|
| 43 |
+
hitBounds: SceneHitBounds
|
| 44 |
+
focusFrame: SceneAnchorFrame
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
export type SceneRack = {
|
| 48 |
+
id: string
|
| 49 |
+
index: number
|
| 50 |
+
x: number
|
| 51 |
+
y: number
|
| 52 |
+
width: number
|
| 53 |
+
height: number
|
| 54 |
+
load: number
|
| 55 |
+
nodeIds: string[]
|
| 56 |
+
hitBounds: SceneHitBounds
|
| 57 |
+
focusFrame: SceneAnchorFrame
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
export type SceneGpu = {
|
| 61 |
+
id: string
|
| 62 |
+
kind: 'gpu'
|
| 63 |
+
nodeId: string
|
| 64 |
+
nodeIndex: number
|
| 65 |
+
domainIndex: number
|
| 66 |
+
domainLocalIndex: number
|
| 67 |
+
columnIndex: number
|
| 68 |
+
rowIndex: number
|
| 69 |
+
globalIndex: number
|
| 70 |
+
localIndex: number
|
| 71 |
+
x: number
|
| 72 |
+
y: number
|
| 73 |
+
width: number
|
| 74 |
+
height: number
|
| 75 |
+
active: boolean
|
| 76 |
+
stage: number
|
| 77 |
+
tpLane: number
|
| 78 |
+
cpShard: number
|
| 79 |
+
epLane: number
|
| 80 |
+
dpReplica: number
|
| 81 |
+
replicaGroup: number
|
| 82 |
+
fsdpRank: number
|
| 83 |
+
utilization: number
|
| 84 |
+
linkLoad: number
|
| 85 |
+
memoryUsedGB: number
|
| 86 |
+
memoryCapacityGB: number
|
| 87 |
+
fillColor: number
|
| 88 |
+
outlineAlpha: number
|
| 89 |
+
title: string
|
| 90 |
+
hitBounds: SceneHitBounds
|
| 91 |
+
focusFrame: SceneAnchorFrame
|
| 92 |
+
lodFrame: SceneAnchorFrame
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
export type SceneNode = {
|
| 96 |
+
id: string
|
| 97 |
+
kind: 'node'
|
| 98 |
+
index: number
|
| 99 |
+
domainIndex: number
|
| 100 |
+
domainLocalIndex: number
|
| 101 |
+
columnIndex: number
|
| 102 |
+
rowIndex: number
|
| 103 |
+
x: number
|
| 104 |
+
y: number
|
| 105 |
+
width: number
|
| 106 |
+
height: number
|
| 107 |
+
hubX: number
|
| 108 |
+
hubY: number
|
| 109 |
+
busX1: number
|
| 110 |
+
busX2: number
|
| 111 |
+
activeCount: number
|
| 112 |
+
localFabricLoad: number
|
| 113 |
+
interNodeLoad: number
|
| 114 |
+
gpus: SceneGpu[]
|
| 115 |
+
hitBounds: SceneHitBounds
|
| 116 |
+
focusFrame: SceneAnchorFrame
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
export type SceneLink = {
|
| 120 |
+
id: string
|
| 121 |
+
kind: LinkKind
|
| 122 |
+
scope: 'node' | 'rack'
|
| 123 |
+
x1: number
|
| 124 |
+
y1: number
|
| 125 |
+
x2: number
|
| 126 |
+
y2: number
|
| 127 |
+
load: number
|
| 128 |
+
color: number
|
| 129 |
+
width: number
|
| 130 |
+
hitWidth: number
|
| 131 |
+
title: string
|
| 132 |
+
trafficType: 'tp' | 'pp' | 'cp' | 'fsdp' | 'ep' | 'dp'
|
| 133 |
+
transport: 'nvlink' | 'infiniband'
|
| 134 |
+
volumeGB: number
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
export type HoverTarget = {
|
| 138 |
+
kind: HitTargetKind
|
| 139 |
+
id: string
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
export type TargetDetails = {
|
| 143 |
+
kind: HitTargetKind
|
| 144 |
+
id: string
|
| 145 |
+
heading: string
|
| 146 |
+
subheading: string
|
| 147 |
+
metrics: Array<{ label: string; value: string }>
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
export type TopologySceneModel = {
|
| 151 |
+
width: number
|
| 152 |
+
height: number
|
| 153 |
+
podColumns: number
|
| 154 |
+
podRows: number
|
| 155 |
+
podWidth: number
|
| 156 |
+
podHeight: number
|
| 157 |
+
nodeWidth: number
|
| 158 |
+
nodeHeight: number
|
| 159 |
+
activePodId: string
|
| 160 |
+
activePodBounds: SceneHitBounds
|
| 161 |
+
contextualNodeCount: number
|
| 162 |
+
lodPolicy: TopologyLodPolicy
|
| 163 |
+
pods: ScenePod[]
|
| 164 |
+
racks: SceneRack[]
|
| 165 |
+
nodes: SceneNode[]
|
| 166 |
+
rowLinks: SceneLink[]
|
| 167 |
+
columnLinks: SceneLink[]
|
| 168 |
+
busLinks: SceneLink[]
|
| 169 |
+
objectCounts: {
|
| 170 |
+
pods: number
|
| 171 |
+
nodes: number
|
| 172 |
+
gpus: number
|
| 173 |
+
links: number
|
| 174 |
+
activeGpus: number
|
| 175 |
+
contextualNodes: number
|
| 176 |
+
}
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
const clamp = (value: number, min: number, max: number) =>
|
| 180 |
+
Math.min(Math.max(value, min), max)
|
| 181 |
+
|
| 182 |
+
const pointInBounds = (bounds: SceneHitBounds, x: number, y: number) =>
|
| 183 |
+
x >= bounds.x &&
|
| 184 |
+
x <= bounds.x + bounds.width &&
|
| 185 |
+
y >= bounds.y &&
|
| 186 |
+
y <= bounds.y + bounds.height
|
| 187 |
+
|
| 188 |
+
const distanceToSegment = (
|
| 189 |
+
x: number,
|
| 190 |
+
y: number,
|
| 191 |
+
x1: number,
|
| 192 |
+
y1: number,
|
| 193 |
+
x2: number,
|
| 194 |
+
y2: number,
|
| 195 |
+
) => {
|
| 196 |
+
const a = x - x1
|
| 197 |
+
const b = y - y1
|
| 198 |
+
const c = x2 - x1
|
| 199 |
+
const d = y2 - y1
|
| 200 |
+
const dot = a * c + b * d
|
| 201 |
+
const length = c * c + d * d
|
| 202 |
+
const t = length === 0 ? 0 : clamp(dot / length, 0, 1)
|
| 203 |
+
const projectionX = x1 + t * c
|
| 204 |
+
const projectionY = y1 + t * d
|
| 205 |
+
const deltaX = x - projectionX
|
| 206 |
+
const deltaY = y - projectionY
|
| 207 |
+
|
| 208 |
+
return Math.sqrt(deltaX * deltaX + deltaY * deltaY)
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
const percent = (value: number) => `${Math.round(value * 100)}%`
|
| 212 |
+
|
| 213 |
+
const stagePalette = [0x6be5d2, 0xf2b36a, 0x8fbcff, 0xf28ac6, 0xb9e769, 0xc19cff]
|
| 214 |
+
|
| 215 |
+
const trafficColorMap: Record<SceneLink['trafficType'], number> = {
|
| 216 |
+
tp: 0x7ce9da,
|
| 217 |
+
pp: 0xf1b067,
|
| 218 |
+
cp: 0x77a8f1,
|
| 219 |
+
fsdp: 0xb0a0ff,
|
| 220 |
+
ep: 0xb9e769,
|
| 221 |
+
dp: 0xf18888,
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
function getStageColor(stageIndex: number) {
|
| 225 |
+
return stagePalette[stageIndex % stagePalette.length]
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
function createNodeLayout(cluster: WorkbenchViewModel['config']['cluster']) {
|
| 229 |
+
const nodeCount = cluster.numNodes
|
| 230 |
+
const nodesPerRack = cluster.nodesPerRack ?? nodeCount
|
| 231 |
+
const rackCount = Math.ceil(nodeCount / nodesPerRack)
|
| 232 |
+
const rackColumns = Math.max(1, Math.ceil(Math.sqrt(rackCount)))
|
| 233 |
+
const rackRows = Math.max(1, Math.ceil(rackCount / rackColumns))
|
| 234 |
+
const nodeColumns = Math.max(1, Math.ceil(Math.sqrt(nodesPerRack * 2)))
|
| 235 |
+
const nodeRows = Math.max(1, Math.ceil(nodesPerRack / nodeColumns))
|
| 236 |
+
const gpuColumns = cluster.gpusPerNode <= 4 ? 2 : 4
|
| 237 |
+
const gpuRows = Math.ceil(cluster.gpusPerNode / gpuColumns)
|
| 238 |
+
|
| 239 |
+
return {
|
| 240 |
+
nodesPerRack,
|
| 241 |
+
rackCount,
|
| 242 |
+
rackColumns,
|
| 243 |
+
rackRows,
|
| 244 |
+
nodeColumns,
|
| 245 |
+
nodeRows,
|
| 246 |
+
gpuColumns,
|
| 247 |
+
gpuRows,
|
| 248 |
+
}
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
export function buildTopologySceneModel(viewModel: WorkbenchViewModel): TopologySceneModel {
|
| 252 |
+
const { config, analysis } = viewModel
|
| 253 |
+
const cluster = config.cluster
|
| 254 |
+
const clusterPaddingX = 72
|
| 255 |
+
const clusterPaddingY = 72
|
| 256 |
+
const rackGapX = 56
|
| 257 |
+
const rackGapY = 58
|
| 258 |
+
const rackPaddingX = 20
|
| 259 |
+
const rackPaddingY = 24
|
| 260 |
+
const nodeGapX = 12
|
| 261 |
+
const nodeGapY = 14
|
| 262 |
+
const layout = createNodeLayout(cluster)
|
| 263 |
+
const gpuWidth = 14
|
| 264 |
+
const gpuHeight = 10
|
| 265 |
+
const gpuGapX = 6
|
| 266 |
+
const gpuGapY = 6
|
| 267 |
+
const nodeWidth =
|
| 268 |
+
28 + layout.gpuColumns * gpuWidth + Math.max(layout.gpuColumns - 1, 0) * gpuGapX
|
| 269 |
+
const nodeHeight =
|
| 270 |
+
26 + layout.gpuRows * gpuHeight + Math.max(layout.gpuRows - 1, 0) * gpuGapY
|
| 271 |
+
const rackWidth =
|
| 272 |
+
rackPaddingX * 2 +
|
| 273 |
+
layout.nodeColumns * nodeWidth +
|
| 274 |
+
Math.max(layout.nodeColumns - 1, 0) * nodeGapX
|
| 275 |
+
const rackHeight =
|
| 276 |
+
rackPaddingY * 2 +
|
| 277 |
+
layout.nodeRows * nodeHeight +
|
| 278 |
+
Math.max(layout.nodeRows - 1, 0) * nodeGapY
|
| 279 |
+
const width =
|
| 280 |
+
clusterPaddingX * 2 +
|
| 281 |
+
layout.rackColumns * rackWidth +
|
| 282 |
+
Math.max(layout.rackColumns - 1, 0) * rackGapX
|
| 283 |
+
const height =
|
| 284 |
+
clusterPaddingY * 2 +
|
| 285 |
+
layout.rackRows * rackHeight +
|
| 286 |
+
Math.max(layout.rackRows - 1, 0) * rackGapY
|
| 287 |
+
|
| 288 |
+
const pods: ScenePod[] = Array.from({ length: layout.rackCount }, (_, rackIndex) => {
|
| 289 |
+
const column = rackIndex % layout.rackColumns
|
| 290 |
+
const row = Math.floor(rackIndex / layout.rackColumns)
|
| 291 |
+
const x = clusterPaddingX + column * (rackWidth + rackGapX)
|
| 292 |
+
const y = clusterPaddingY + row * (rackHeight + rackGapY)
|
| 293 |
+
|
| 294 |
+
return {
|
| 295 |
+
id: `pod-${rackIndex}`,
|
| 296 |
+
kind: 'pod',
|
| 297 |
+
index: rackIndex,
|
| 298 |
+
x,
|
| 299 |
+
y,
|
| 300 |
+
width: rackWidth,
|
| 301 |
+
height: rackHeight,
|
| 302 |
+
centerX: x + rackWidth / 2,
|
| 303 |
+
centerY: y + rackHeight / 2,
|
| 304 |
+
active: false,
|
| 305 |
+
load: 0,
|
| 306 |
+
thermal: 0,
|
| 307 |
+
activeGpus: 0,
|
| 308 |
+
totalGpus: 0,
|
| 309 |
+
title: `${cluster.rackLabel ?? 'rack'} ${rackIndex + 1}`,
|
| 310 |
+
hitBounds: {
|
| 311 |
+
x,
|
| 312 |
+
y,
|
| 313 |
+
width: rackWidth,
|
| 314 |
+
height: rackHeight,
|
| 315 |
+
},
|
| 316 |
+
focusFrame: {
|
| 317 |
+
x: x + 5,
|
| 318 |
+
y: y + 5,
|
| 319 |
+
width: rackWidth - 10,
|
| 320 |
+
height: rackHeight - 10,
|
| 321 |
+
},
|
| 322 |
+
}
|
| 323 |
+
})
|
| 324 |
+
|
| 325 |
+
const nodeByIndex = new Map<number, SceneNode>()
|
| 326 |
+
const gpuByGlobalIndex = new Map<number, SceneGpu>()
|
| 327 |
+
const rawGpuMap = [...analysis.gpuMap].sort((left, right) => left.globalGPUIndex - right.globalGPUIndex)
|
| 328 |
+
const rawGpuByGlobalIndex = new Map(
|
| 329 |
+
rawGpuMap.map((gpu) => [gpu.globalGPUIndex, gpu] as const),
|
| 330 |
+
)
|
| 331 |
+
|
| 332 |
+
for (let nodeIndex = 0; nodeIndex < cluster.numNodes; nodeIndex += 1) {
|
| 333 |
+
const domainIndex = Math.floor(nodeIndex / layout.nodesPerRack)
|
| 334 |
+
const domainLocalIndex = nodeIndex % layout.nodesPerRack
|
| 335 |
+
const columnIndex = domainLocalIndex % layout.nodeColumns
|
| 336 |
+
const rowIndex = Math.floor(domainLocalIndex / layout.nodeColumns)
|
| 337 |
+
const rack = pods[domainIndex]
|
| 338 |
+
const x = rack.x + rackPaddingX + columnIndex * (nodeWidth + nodeGapX)
|
| 339 |
+
const y = rack.y + rackPaddingY + rowIndex * (nodeHeight + nodeGapY)
|
| 340 |
+
const hubX = x + nodeWidth / 2
|
| 341 |
+
const hubY = y + nodeHeight / 2
|
| 342 |
+
const gpuLeftInset =
|
| 343 |
+
(nodeWidth -
|
| 344 |
+
(layout.gpuColumns * gpuWidth + Math.max(layout.gpuColumns - 1, 0) * gpuGapX)) /
|
| 345 |
+
2
|
| 346 |
+
const gpuTopInset =
|
| 347 |
+
(nodeHeight -
|
| 348 |
+
(layout.gpuRows * gpuHeight + Math.max(layout.gpuRows - 1, 0) * gpuGapY)) /
|
| 349 |
+
2
|
| 350 |
+
const nodeGpuMap = rawGpuMap
|
| 351 |
+
.filter((gpu) => gpu.nodeIndex === nodeIndex)
|
| 352 |
+
.sort((left, right) => left.localGPUIndex - right.localGPUIndex)
|
| 353 |
+
|
| 354 |
+
const gpus: SceneGpu[] = nodeGpuMap.map((gpu) => {
|
| 355 |
+
const localColumn = gpu.localGPUIndex % layout.gpuColumns
|
| 356 |
+
const localRow = Math.floor(gpu.localGPUIndex / layout.gpuColumns)
|
| 357 |
+
const gpuX = x + gpuLeftInset + localColumn * (gpuWidth + gpuGapX)
|
| 358 |
+
const gpuY = y + gpuTopInset + localRow * (gpuHeight + gpuGapY)
|
| 359 |
+
const memoryUtilization = gpu.memoryCapacityGB > 0 ? gpu.memoryUsedGB / gpu.memoryCapacityGB : 0
|
| 360 |
+
const fillColor = gpu.memoryUsedGB > 0 ? getStageColor(Math.max(gpu.ppStage, 0)) : 0x26404d
|
| 361 |
+
const outlineAlpha = gpu.memoryUsedGB > 0 ? 0.3 + memoryUtilization * 0.4 : 0.14
|
| 362 |
+
|
| 363 |
+
const sceneGpu = {
|
| 364 |
+
id: `gpu-${gpu.globalGPUIndex}`,
|
| 365 |
+
kind: 'gpu' as const,
|
| 366 |
+
nodeId: `node-${nodeIndex}`,
|
| 367 |
+
nodeIndex,
|
| 368 |
+
domainIndex,
|
| 369 |
+
domainLocalIndex,
|
| 370 |
+
columnIndex,
|
| 371 |
+
rowIndex,
|
| 372 |
+
globalIndex: gpu.globalGPUIndex,
|
| 373 |
+
localIndex: gpu.localGPUIndex,
|
| 374 |
+
x: gpuX,
|
| 375 |
+
y: gpuY,
|
| 376 |
+
width: gpuWidth,
|
| 377 |
+
height: gpuHeight,
|
| 378 |
+
active: gpu.isActive,
|
| 379 |
+
stage: gpu.ppStage,
|
| 380 |
+
tpLane: gpu.tpLane,
|
| 381 |
+
cpShard: gpu.cpShard,
|
| 382 |
+
epLane: gpu.epLane,
|
| 383 |
+
dpReplica: gpu.dpReplica,
|
| 384 |
+
replicaGroup: gpu.replicaGroup,
|
| 385 |
+
fsdpRank: gpu.fsdpRank,
|
| 386 |
+
utilization: clamp(memoryUtilization, 0, 1),
|
| 387 |
+
linkLoad: 0,
|
| 388 |
+
memoryUsedGB: gpu.memoryUsedGB,
|
| 389 |
+
memoryCapacityGB: gpu.memoryCapacityGB,
|
| 390 |
+
fillColor,
|
| 391 |
+
outlineAlpha,
|
| 392 |
+
title: `GPU ${gpu.globalGPUIndex + 1}`,
|
| 393 |
+
hitBounds: {
|
| 394 |
+
x: gpuX - 3,
|
| 395 |
+
y: gpuY - 3,
|
| 396 |
+
width: gpuWidth + 6,
|
| 397 |
+
height: gpuHeight + 6,
|
| 398 |
+
},
|
| 399 |
+
focusFrame: {
|
| 400 |
+
x: gpuX - 0.8,
|
| 401 |
+
y: gpuY - 0.8,
|
| 402 |
+
width: gpuWidth + 1.6,
|
| 403 |
+
height: gpuHeight + 1.6,
|
| 404 |
+
},
|
| 405 |
+
lodFrame: {
|
| 406 |
+
x: gpuX,
|
| 407 |
+
y: gpuY,
|
| 408 |
+
width: gpuWidth,
|
| 409 |
+
height: gpuHeight,
|
| 410 |
+
},
|
| 411 |
+
}
|
| 412 |
+
|
| 413 |
+
gpuByGlobalIndex.set(gpu.globalGPUIndex, sceneGpu)
|
| 414 |
+
return sceneGpu
|
| 415 |
+
})
|
| 416 |
+
|
| 417 |
+
const node = {
|
| 418 |
+
id: `node-${nodeIndex}`,
|
| 419 |
+
kind: 'node' as const,
|
| 420 |
+
index: nodeIndex,
|
| 421 |
+
domainIndex,
|
| 422 |
+
domainLocalIndex,
|
| 423 |
+
columnIndex,
|
| 424 |
+
rowIndex,
|
| 425 |
+
x,
|
| 426 |
+
y,
|
| 427 |
+
width: nodeWidth,
|
| 428 |
+
height: nodeHeight,
|
| 429 |
+
hubX,
|
| 430 |
+
hubY,
|
| 431 |
+
busX1: x + 8,
|
| 432 |
+
busX2: x + nodeWidth - 8,
|
| 433 |
+
activeCount: gpus.filter((gpu) => gpu.memoryUsedGB > 0).length,
|
| 434 |
+
localFabricLoad: 0,
|
| 435 |
+
interNodeLoad: 0,
|
| 436 |
+
gpus,
|
| 437 |
+
hitBounds: {
|
| 438 |
+
x,
|
| 439 |
+
y,
|
| 440 |
+
width: nodeWidth,
|
| 441 |
+
height: nodeHeight,
|
| 442 |
+
},
|
| 443 |
+
focusFrame: {
|
| 444 |
+
x: x + 2,
|
| 445 |
+
y: y + 2,
|
| 446 |
+
width: nodeWidth - 4,
|
| 447 |
+
height: nodeHeight - 4,
|
| 448 |
+
},
|
| 449 |
+
}
|
| 450 |
+
|
| 451 |
+
nodeByIndex.set(nodeIndex, node)
|
| 452 |
+
}
|
| 453 |
+
|
| 454 |
+
const aggregateLinks = new Map<
|
| 455 |
+
string,
|
| 456 |
+
{
|
| 457 |
+
fromNode: number
|
| 458 |
+
toNode: number
|
| 459 |
+
trafficType: SceneLink['trafficType']
|
| 460 |
+
transport: SceneLink['transport']
|
| 461 |
+
loadSum: number
|
| 462 |
+
volumeSum: number
|
| 463 |
+
count: number
|
| 464 |
+
}
|
| 465 |
+
>()
|
| 466 |
+
const aggregateRackLinks = new Map<
|
| 467 |
+
string,
|
| 468 |
+
{
|
| 469 |
+
fromRack: number
|
| 470 |
+
toRack: number
|
| 471 |
+
trafficType: SceneLink['trafficType']
|
| 472 |
+
transport: SceneLink['transport']
|
| 473 |
+
loadSum: number
|
| 474 |
+
volumeSum: number
|
| 475 |
+
count: number
|
| 476 |
+
}
|
| 477 |
+
>()
|
| 478 |
+
|
| 479 |
+
const addAggregate = (
|
| 480 |
+
fromNode: number,
|
| 481 |
+
toNode: number,
|
| 482 |
+
trafficType: SceneLink['trafficType'],
|
| 483 |
+
transport: SceneLink['transport'],
|
| 484 |
+
utilizationPercent: number,
|
| 485 |
+
volumeGB: number,
|
| 486 |
+
) => {
|
| 487 |
+
const ordered =
|
| 488 |
+
fromNode <= toNode ? [fromNode, toNode] as const : [toNode, fromNode] as const
|
| 489 |
+
const key = `${ordered[0]}:${ordered[1]}:${trafficType}:${transport}`
|
| 490 |
+
const current = aggregateLinks.get(key) ?? {
|
| 491 |
+
fromNode: ordered[0],
|
| 492 |
+
toNode: ordered[1],
|
| 493 |
+
trafficType,
|
| 494 |
+
transport,
|
| 495 |
+
loadSum: 0,
|
| 496 |
+
volumeSum: 0,
|
| 497 |
+
count: 0,
|
| 498 |
+
}
|
| 499 |
+
|
| 500 |
+
current.loadSum += utilizationPercent / 100
|
| 501 |
+
current.volumeSum += volumeGB
|
| 502 |
+
current.count += 1
|
| 503 |
+
aggregateLinks.set(key, current)
|
| 504 |
+
}
|
| 505 |
+
|
| 506 |
+
const addRackAggregate = (
|
| 507 |
+
fromRack: number,
|
| 508 |
+
toRack: number,
|
| 509 |
+
trafficType: SceneLink['trafficType'],
|
| 510 |
+
transport: SceneLink['transport'],
|
| 511 |
+
utilizationPercent: number,
|
| 512 |
+
volumeGB: number,
|
| 513 |
+
) => {
|
| 514 |
+
const ordered =
|
| 515 |
+
fromRack <= toRack ? ([fromRack, toRack] as const) : ([toRack, fromRack] as const)
|
| 516 |
+
const key = `${ordered[0]}:${ordered[1]}:${trafficType}:${transport}`
|
| 517 |
+
const current = aggregateRackLinks.get(key) ?? {
|
| 518 |
+
fromRack: ordered[0],
|
| 519 |
+
toRack: ordered[1],
|
| 520 |
+
trafficType,
|
| 521 |
+
transport,
|
| 522 |
+
loadSum: 0,
|
| 523 |
+
volumeSum: 0,
|
| 524 |
+
count: 0,
|
| 525 |
+
}
|
| 526 |
+
|
| 527 |
+
current.loadSum += utilizationPercent / 100
|
| 528 |
+
current.volumeSum += volumeGB
|
| 529 |
+
current.count += 1
|
| 530 |
+
aggregateRackLinks.set(key, current)
|
| 531 |
+
}
|
| 532 |
+
|
| 533 |
+
const pushLoad = (target: Map<number, number[]>, key: number, value: number) => {
|
| 534 |
+
target.set(key, [...(target.get(key) ?? []), value])
|
| 535 |
+
}
|
| 536 |
+
|
| 537 |
+
for (const link of analysis.links) {
|
| 538 |
+
const fromGpu = rawGpuByGlobalIndex.get(link.fromGPU)
|
| 539 |
+
const toGpu = rawGpuByGlobalIndex.get(link.toGPU)
|
| 540 |
+
|
| 541 |
+
if (!fromGpu || !toGpu) {
|
| 542 |
+
continue
|
| 543 |
+
}
|
| 544 |
+
|
| 545 |
+
const fromRack = Math.floor(fromGpu.nodeIndex / layout.nodesPerRack)
|
| 546 |
+
const toRack = Math.floor(toGpu.nodeIndex / layout.nodesPerRack)
|
| 547 |
+
|
| 548 |
+
if (fromRack !== toRack) {
|
| 549 |
+
addRackAggregate(
|
| 550 |
+
fromRack,
|
| 551 |
+
toRack,
|
| 552 |
+
link.trafficType,
|
| 553 |
+
link.type,
|
| 554 |
+
link.utilizationPercent,
|
| 555 |
+
link.volumeGB,
|
| 556 |
+
)
|
| 557 |
+
continue
|
| 558 |
+
}
|
| 559 |
+
|
| 560 |
+
addAggregate(
|
| 561 |
+
fromGpu.nodeIndex,
|
| 562 |
+
toGpu.nodeIndex,
|
| 563 |
+
link.trafficType,
|
| 564 |
+
link.type,
|
| 565 |
+
link.utilizationPercent,
|
| 566 |
+
link.volumeGB,
|
| 567 |
+
)
|
| 568 |
+
}
|
| 569 |
+
|
| 570 |
+
const rowLinks: SceneLink[] = []
|
| 571 |
+
const columnLinks: SceneLink[] = []
|
| 572 |
+
const busLinks: SceneLink[] = []
|
| 573 |
+
const localLoads = new Map<number, number[]>()
|
| 574 |
+
const interLoads = new Map<number, number[]>()
|
| 575 |
+
|
| 576 |
+
for (const aggregate of aggregateLinks.values()) {
|
| 577 |
+
const averageLoad = aggregate.count > 0 ? aggregate.loadSum / aggregate.count : 0
|
| 578 |
+
const averageVolume = aggregate.count > 0 ? aggregate.volumeSum / aggregate.count : 0
|
| 579 |
+
const color = trafficColorMap[aggregate.trafficType]
|
| 580 |
+
|
| 581 |
+
if (aggregate.fromNode === aggregate.toNode) {
|
| 582 |
+
const node = nodeByIndex.get(aggregate.fromNode)
|
| 583 |
+
if (!node) {
|
| 584 |
+
continue
|
| 585 |
+
}
|
| 586 |
+
|
| 587 |
+
const offset = busLinks.filter((link) => link.id.startsWith(`bus-${node.index}`)).length * 3
|
| 588 |
+
busLinks.push({
|
| 589 |
+
id: `bus-${node.index}-${aggregate.trafficType}`,
|
| 590 |
+
kind: 'bus',
|
| 591 |
+
scope: 'node',
|
| 592 |
+
x1: node.busX1,
|
| 593 |
+
y1: node.hubY + offset,
|
| 594 |
+
x2: node.busX2,
|
| 595 |
+
y2: node.hubY + offset,
|
| 596 |
+
load: averageLoad,
|
| 597 |
+
color,
|
| 598 |
+
width: 0.9 + averageLoad * 2,
|
| 599 |
+
hitWidth: 10,
|
| 600 |
+
title: `${aggregate.trafficType.toUpperCase()} ${aggregate.transport} on ${cluster.nodeLabel ?? 'node'} ${node.index + 1}`,
|
| 601 |
+
trafficType: aggregate.trafficType,
|
| 602 |
+
transport: aggregate.transport,
|
| 603 |
+
volumeGB: roundVolume(averageVolume),
|
| 604 |
+
})
|
| 605 |
+
pushLoad(localLoads, node.index, averageLoad)
|
| 606 |
+
continue
|
| 607 |
+
}
|
| 608 |
+
|
| 609 |
+
const fromNode = nodeByIndex.get(aggregate.fromNode)
|
| 610 |
+
const toNode = nodeByIndex.get(aggregate.toNode)
|
| 611 |
+
if (!fromNode || !toNode) {
|
| 612 |
+
continue
|
| 613 |
+
}
|
| 614 |
+
|
| 615 |
+
const sceneLink = {
|
| 616 |
+
id: `link-${aggregate.fromNode}-${aggregate.toNode}-${aggregate.trafficType}`,
|
| 617 |
+
kind:
|
| 618 |
+
Math.abs(fromNode.hubX - toNode.hubX) >= Math.abs(fromNode.hubY - toNode.hubY)
|
| 619 |
+
? ('row' as const)
|
| 620 |
+
: ('column' as const),
|
| 621 |
+
scope: 'node' as const,
|
| 622 |
+
x1: fromNode.hubX,
|
| 623 |
+
y1: fromNode.hubY,
|
| 624 |
+
x2: toNode.hubX,
|
| 625 |
+
y2: toNode.hubY,
|
| 626 |
+
load: averageLoad,
|
| 627 |
+
color,
|
| 628 |
+
width: 1 + averageLoad * 2.6,
|
| 629 |
+
hitWidth: aggregate.transport === 'infiniband' ? 18 : 12,
|
| 630 |
+
title:
|
| 631 |
+
`${aggregate.trafficType.toUpperCase()} ${aggregate.transport} between ` +
|
| 632 |
+
`${cluster.nodeLabel ?? 'node'} ${fromNode.index + 1} and ${cluster.nodeLabel ?? 'node'} ${toNode.index + 1}`,
|
| 633 |
+
trafficType: aggregate.trafficType,
|
| 634 |
+
transport: aggregate.transport,
|
| 635 |
+
volumeGB: roundVolume(averageVolume),
|
| 636 |
+
}
|
| 637 |
+
|
| 638 |
+
if (sceneLink.kind === 'row') {
|
| 639 |
+
rowLinks.push(sceneLink)
|
| 640 |
+
} else {
|
| 641 |
+
columnLinks.push(sceneLink)
|
| 642 |
+
}
|
| 643 |
+
|
| 644 |
+
const targetMap = aggregate.transport === 'nvlink' ? localLoads : interLoads
|
| 645 |
+
pushLoad(targetMap, fromNode.index, averageLoad)
|
| 646 |
+
pushLoad(targetMap, toNode.index, averageLoad)
|
| 647 |
+
}
|
| 648 |
+
|
| 649 |
+
for (const aggregate of aggregateRackLinks.values()) {
|
| 650 |
+
const averageLoad = aggregate.count > 0 ? aggregate.loadSum / aggregate.count : 0
|
| 651 |
+
const averageVolume = aggregate.count > 0 ? aggregate.volumeSum / aggregate.count : 0
|
| 652 |
+
const color = trafficColorMap[aggregate.trafficType]
|
| 653 |
+
const fromRack = pods[aggregate.fromRack]
|
| 654 |
+
const toRack = pods[aggregate.toRack]
|
| 655 |
+
|
| 656 |
+
if (!fromRack || !toRack) {
|
| 657 |
+
continue
|
| 658 |
+
}
|
| 659 |
+
|
| 660 |
+
const sceneLink = {
|
| 661 |
+
id: `rack-link-${aggregate.fromRack}-${aggregate.toRack}-${aggregate.trafficType}`,
|
| 662 |
+
kind:
|
| 663 |
+
Math.abs(fromRack.centerX - toRack.centerX) >= Math.abs(fromRack.centerY - toRack.centerY)
|
| 664 |
+
? ('row' as const)
|
| 665 |
+
: ('column' as const),
|
| 666 |
+
scope: 'rack' as const,
|
| 667 |
+
x1: fromRack.centerX,
|
| 668 |
+
y1: fromRack.centerY,
|
| 669 |
+
x2: toRack.centerX,
|
| 670 |
+
y2: toRack.centerY,
|
| 671 |
+
load: averageLoad,
|
| 672 |
+
color,
|
| 673 |
+
width: 1.6 + averageLoad * 3.2,
|
| 674 |
+
hitWidth: 22,
|
| 675 |
+
title:
|
| 676 |
+
`${aggregate.trafficType.toUpperCase()} ${aggregate.transport} between ` +
|
| 677 |
+
`${cluster.rackLabel ?? 'rack'} ${aggregate.fromRack + 1} and ${cluster.rackLabel ?? 'rack'} ${aggregate.toRack + 1}`,
|
| 678 |
+
trafficType: aggregate.trafficType,
|
| 679 |
+
transport: aggregate.transport,
|
| 680 |
+
volumeGB: roundVolume(averageVolume),
|
| 681 |
+
}
|
| 682 |
+
|
| 683 |
+
if (sceneLink.kind === 'row') {
|
| 684 |
+
rowLinks.push(sceneLink)
|
| 685 |
+
} else {
|
| 686 |
+
columnLinks.push(sceneLink)
|
| 687 |
+
}
|
| 688 |
+
|
| 689 |
+
for (const node of nodeByIndex.values()) {
|
| 690 |
+
if (node.domainIndex === aggregate.fromRack || node.domainIndex === aggregate.toRack) {
|
| 691 |
+
pushLoad(interLoads, node.index, averageLoad)
|
| 692 |
+
}
|
| 693 |
+
}
|
| 694 |
+
}
|
| 695 |
+
|
| 696 |
+
for (const node of nodeByIndex.values()) {
|
| 697 |
+
node.localFabricLoad = average(localLoads.get(node.index) ?? [])
|
| 698 |
+
node.interNodeLoad = average(interLoads.get(node.index) ?? [])
|
| 699 |
+
|
| 700 |
+
for (const gpu of node.gpus) {
|
| 701 |
+
gpu.linkLoad = clamp(node.localFabricLoad * 0.7 + node.interNodeLoad * 0.6, 0, 1)
|
| 702 |
+
}
|
| 703 |
+
}
|
| 704 |
+
|
| 705 |
+
for (const pod of pods) {
|
| 706 |
+
const rackNodes = Array.from(nodeByIndex.values()).filter((node) => node.domainIndex === pod.index)
|
| 707 |
+
pod.load = average(rackNodes.map((node) => node.interNodeLoad))
|
| 708 |
+
pod.thermal = average(rackNodes.map((node) => node.localFabricLoad))
|
| 709 |
+
pod.activeGpus = rackNodes.reduce((sum, node) => sum + node.gpus.filter((gpu) => gpu.active).length, 0)
|
| 710 |
+
pod.totalGpus = rackNodes.reduce((sum, node) => sum + node.gpus.length, 0)
|
| 711 |
+
}
|
| 712 |
+
|
| 713 |
+
const activePod =
|
| 714 |
+
[...pods].sort((left, right) => right.activeGpus - left.activeGpus)[0] ?? pods[0]
|
| 715 |
+
|
| 716 |
+
for (const pod of pods) {
|
| 717 |
+
pod.active = pod.id === activePod?.id
|
| 718 |
+
}
|
| 719 |
+
|
| 720 |
+
const racks: SceneRack[] = pods.map((pod) => ({
|
| 721 |
+
id: `rack-${pod.index}`,
|
| 722 |
+
index: pod.index,
|
| 723 |
+
x: pod.x,
|
| 724 |
+
y: pod.y,
|
| 725 |
+
width: pod.width,
|
| 726 |
+
height: pod.height,
|
| 727 |
+
load: pod.load,
|
| 728 |
+
nodeIds: Array.from(nodeByIndex.values())
|
| 729 |
+
.filter((node) => node.domainIndex === pod.index)
|
| 730 |
+
.map((node) => node.id),
|
| 731 |
+
hitBounds: pod.hitBounds,
|
| 732 |
+
focusFrame: pod.focusFrame,
|
| 733 |
+
}))
|
| 734 |
+
|
| 735 |
+
const nodes = [...nodeByIndex.values()].sort((left, right) => left.index - right.index)
|
| 736 |
+
|
| 737 |
+
return {
|
| 738 |
+
width,
|
| 739 |
+
height,
|
| 740 |
+
podColumns: layout.rackColumns,
|
| 741 |
+
podRows: layout.rackRows,
|
| 742 |
+
podWidth: rackWidth,
|
| 743 |
+
podHeight: rackHeight,
|
| 744 |
+
nodeWidth,
|
| 745 |
+
nodeHeight,
|
| 746 |
+
activePodId: activePod?.id ?? 'pod-0',
|
| 747 |
+
activePodBounds: activePod?.hitBounds ?? {
|
| 748 |
+
x: 0,
|
| 749 |
+
y: 0,
|
| 750 |
+
width,
|
| 751 |
+
height,
|
| 752 |
+
},
|
| 753 |
+
contextualNodeCount: cluster.numNodes,
|
| 754 |
+
lodPolicy: TOPOLOGY_LOD_POLICY,
|
| 755 |
+
pods,
|
| 756 |
+
racks,
|
| 757 |
+
nodes,
|
| 758 |
+
rowLinks,
|
| 759 |
+
columnLinks,
|
| 760 |
+
busLinks,
|
| 761 |
+
objectCounts: {
|
| 762 |
+
pods: pods.length,
|
| 763 |
+
nodes: nodes.length,
|
| 764 |
+
gpus: nodes.reduce((sum, node) => sum + node.gpus.length, 0),
|
| 765 |
+
links: rowLinks.length + columnLinks.length + busLinks.length,
|
| 766 |
+
activeGpus: analysis.gpuMap.filter((gpu) => gpu.isActive).length,
|
| 767 |
+
contextualNodes: cluster.numNodes,
|
| 768 |
+
},
|
| 769 |
+
}
|
| 770 |
+
}
|
| 771 |
+
|
| 772 |
+
function average(values: number[]) {
|
| 773 |
+
if (values.length === 0) {
|
| 774 |
+
return 0
|
| 775 |
+
}
|
| 776 |
+
|
| 777 |
+
return values.reduce((sum, value) => sum + value, 0) / values.length
|
| 778 |
+
}
|
| 779 |
+
|
| 780 |
+
function roundVolume(value: number) {
|
| 781 |
+
return Math.round(value * 100) / 100
|
| 782 |
+
}
|
| 783 |
+
|
| 784 |
+
export function findHoverTarget(
|
| 785 |
+
model: TopologySceneModel,
|
| 786 |
+
x: number,
|
| 787 |
+
y: number,
|
| 788 |
+
): HoverTarget | null {
|
| 789 |
+
for (const node of model.nodes) {
|
| 790 |
+
for (const gpu of node.gpus) {
|
| 791 |
+
if (pointInBounds(gpu.hitBounds, x, y)) {
|
| 792 |
+
return {
|
| 793 |
+
kind: 'gpu',
|
| 794 |
+
id: gpu.id,
|
| 795 |
+
}
|
| 796 |
+
}
|
| 797 |
+
}
|
| 798 |
+
}
|
| 799 |
+
|
| 800 |
+
for (const node of model.nodes) {
|
| 801 |
+
if (pointInBounds(node.hitBounds, x, y)) {
|
| 802 |
+
return {
|
| 803 |
+
kind: 'node',
|
| 804 |
+
id: node.id,
|
| 805 |
+
}
|
| 806 |
+
}
|
| 807 |
+
}
|
| 808 |
+
|
| 809 |
+
for (const pod of model.pods) {
|
| 810 |
+
if (pointInBounds(pod.hitBounds, x, y)) {
|
| 811 |
+
return {
|
| 812 |
+
kind: 'pod',
|
| 813 |
+
id: pod.id,
|
| 814 |
+
}
|
| 815 |
+
}
|
| 816 |
+
}
|
| 817 |
+
|
| 818 |
+
const links = [...model.rowLinks, ...model.columnLinks, ...model.busLinks]
|
| 819 |
+
for (const link of links) {
|
| 820 |
+
const distance = distanceToSegment(x, y, link.x1, link.y1, link.x2, link.y2)
|
| 821 |
+
if (distance <= link.hitWidth / 2) {
|
| 822 |
+
return {
|
| 823 |
+
kind: 'link',
|
| 824 |
+
id: link.id,
|
| 825 |
+
}
|
| 826 |
+
}
|
| 827 |
+
}
|
| 828 |
+
|
| 829 |
+
return null
|
| 830 |
+
}
|
| 831 |
+
|
| 832 |
+
export function describeTarget(
|
| 833 |
+
model: TopologySceneModel,
|
| 834 |
+
viewModel: WorkbenchViewModel,
|
| 835 |
+
target: HoverTarget | null,
|
| 836 |
+
): TargetDetails | null {
|
| 837 |
+
if (!target) {
|
| 838 |
+
return null
|
| 839 |
+
}
|
| 840 |
+
|
| 841 |
+
const rackLabel = viewModel.config.cluster.rackLabel ?? 'rack'
|
| 842 |
+
const nodeLabel = viewModel.config.cluster.nodeLabel ?? 'node'
|
| 843 |
+
|
| 844 |
+
if (target.kind === 'pod') {
|
| 845 |
+
const pod = model.pods.find((item) => item.id === target.id)
|
| 846 |
+
if (!pod) {
|
| 847 |
+
return null
|
| 848 |
+
}
|
| 849 |
+
|
| 850 |
+
return {
|
| 851 |
+
kind: 'pod',
|
| 852 |
+
id: pod.id,
|
| 853 |
+
heading: `${rackLabel} ${pod.index + 1}`,
|
| 854 |
+
subheading: `${pod.totalGpus} GPUs laid out across ${Math.ceil(pod.totalGpus / viewModel.config.cluster.gpusPerNode)} ${nodeLabel}s.`,
|
| 855 |
+
metrics: [
|
| 856 |
+
{ label: 'Active GPUs', value: String(pod.activeGpus) },
|
| 857 |
+
{ label: 'Rack-local load', value: percent(pod.thermal) },
|
| 858 |
+
{ label: 'Scale-out load', value: percent(pod.load) },
|
| 859 |
+
{ label: 'Rack capacity', value: `${pod.totalGpus} GPUs` },
|
| 860 |
+
],
|
| 861 |
+
}
|
| 862 |
+
}
|
| 863 |
+
|
| 864 |
+
if (target.kind === 'gpu') {
|
| 865 |
+
const gpu = model.nodes.flatMap((node) => node.gpus).find((item) => item.id === target.id)
|
| 866 |
+
if (!gpu) {
|
| 867 |
+
return null
|
| 868 |
+
}
|
| 869 |
+
|
| 870 |
+
return {
|
| 871 |
+
kind: 'gpu',
|
| 872 |
+
id: gpu.id,
|
| 873 |
+
heading: `GPU ${gpu.globalIndex + 1}`,
|
| 874 |
+
subheading:
|
| 875 |
+
`${rackLabel} ${gpu.domainIndex + 1} · ${nodeLabel} ${gpu.domainLocalIndex + 1} · ` +
|
| 876 |
+
`slot ${gpu.localIndex + 1}`,
|
| 877 |
+
metrics: [
|
| 878 |
+
{ label: 'Stage', value: gpu.memoryUsedGB > 0 ? `P${gpu.stage + 1}` : 'idle' },
|
| 879 |
+
{ label: 'Tensor lane', value: gpu.memoryUsedGB > 0 ? `T${gpu.tpLane + 1}` : 'idle' },
|
| 880 |
+
{ label: 'Context shard', value: gpu.memoryUsedGB > 0 ? `C${gpu.cpShard + 1}` : 'idle' },
|
| 881 |
+
{ label: 'Expert lane', value: gpu.memoryUsedGB > 0 ? `E${gpu.epLane + 1}` : 'idle' },
|
| 882 |
+
{ label: 'Data replica', value: gpu.memoryUsedGB > 0 ? `D${gpu.dpReplica + 1}` : 'idle' },
|
| 883 |
+
{ label: 'Replica group', value: gpu.memoryUsedGB > 0 ? `G${gpu.replicaGroup + 1}` : 'idle' },
|
| 884 |
+
{ label: 'FSDP rank', value: gpu.memoryUsedGB > 0 ? `F${gpu.fsdpRank + 1}` : 'idle' },
|
| 885 |
+
{ label: 'HBM', value: `${gpu.memoryUsedGB.toFixed(1)} / ${gpu.memoryCapacityGB.toFixed(0)} GB` },
|
| 886 |
+
{ label: 'Link load', value: percent(gpu.linkLoad) },
|
| 887 |
+
],
|
| 888 |
+
}
|
| 889 |
+
}
|
| 890 |
+
|
| 891 |
+
if (target.kind === 'node') {
|
| 892 |
+
const node = model.nodes.find((item) => item.id === target.id)
|
| 893 |
+
if (!node) {
|
| 894 |
+
return null
|
| 895 |
+
}
|
| 896 |
+
|
| 897 |
+
return {
|
| 898 |
+
kind: 'node',
|
| 899 |
+
id: node.id,
|
| 900 |
+
heading: `${nodeLabel} ${node.domainLocalIndex + 1}`,
|
| 901 |
+
subheading: `${rackLabel} ${node.domainIndex + 1} · ${node.activeCount}/${viewModel.config.cluster.gpusPerNode} GPUs allocated`,
|
| 902 |
+
metrics: [
|
| 903 |
+
{ label: 'Active GPUs', value: String(node.activeCount) },
|
| 904 |
+
{ label: 'Rack-local load', value: percent(node.localFabricLoad) },
|
| 905 |
+
{ label: 'Scale-out load', value: percent(node.interNodeLoad) },
|
| 906 |
+
{ label: 'Node size', value: `${viewModel.config.cluster.gpusPerNode} GPUs` },
|
| 907 |
+
],
|
| 908 |
+
}
|
| 909 |
+
}
|
| 910 |
+
|
| 911 |
+
const link = [...model.rowLinks, ...model.columnLinks, ...model.busLinks].find(
|
| 912 |
+
(item) => item.id === target.id,
|
| 913 |
+
)
|
| 914 |
+
if (!link) {
|
| 915 |
+
return null
|
| 916 |
+
}
|
| 917 |
+
|
| 918 |
+
return {
|
| 919 |
+
kind: 'link',
|
| 920 |
+
id: link.id,
|
| 921 |
+
heading: link.title,
|
| 922 |
+
subheading:
|
| 923 |
+
link.scope === 'rack'
|
| 924 |
+
? 'Inter-rack fabric segment'
|
| 925 |
+
: link.transport === 'nvlink'
|
| 926 |
+
? 'Local high-bandwidth GPU fabric'
|
| 927 |
+
: 'Rack-local fabric segment',
|
| 928 |
+
metrics: [
|
| 929 |
+
{ label: 'Traffic', value: link.trafficType.toUpperCase() },
|
| 930 |
+
{ label: 'Transport', value: link.transport },
|
| 931 |
+
{ label: 'Utilization', value: percent(link.load) },
|
| 932 |
+
{ label: 'Volume', value: `${link.volumeGB.toFixed(2)} GB` },
|
| 933 |
+
],
|
| 934 |
+
}
|
| 935 |
+
}
|
| 936 |
+
|
| 937 |
+
export function getBoundsViewport(
|
| 938 |
+
bounds: SceneHitBounds,
|
| 939 |
+
width: number,
|
| 940 |
+
height: number,
|
| 941 |
+
padding = 26,
|
| 942 |
+
): ViewportState {
|
| 943 |
+
const scale = clamp(
|
| 944 |
+
Math.min((width - padding * 2) / bounds.width, (height - padding * 2) / bounds.height),
|
| 945 |
+
0.04,
|
| 946 |
+
32,
|
| 947 |
+
)
|
| 948 |
+
|
| 949 |
+
return {
|
| 950 |
+
scale,
|
| 951 |
+
x: (width - bounds.width * scale) / 2 - bounds.x * scale,
|
| 952 |
+
y: (height - bounds.height * scale) / 2 - bounds.y * scale,
|
| 953 |
+
}
|
| 954 |
+
}
|
| 955 |
+
|
| 956 |
+
export function getFitViewport(
|
| 957 |
+
model: TopologySceneModel,
|
| 958 |
+
width: number,
|
| 959 |
+
height: number,
|
| 960 |
+
): ViewportState {
|
| 961 |
+
return getBoundsViewport(
|
| 962 |
+
{
|
| 963 |
+
x: 0,
|
| 964 |
+
y: 0,
|
| 965 |
+
width: model.width,
|
| 966 |
+
height: model.height,
|
| 967 |
+
},
|
| 968 |
+
width,
|
| 969 |
+
height,
|
| 970 |
+
)
|
| 971 |
+
}
|
| 972 |
+
|
| 973 |
+
export function worldToScreen(bounds: SceneHitBounds, viewport: ViewportState) {
|
| 974 |
+
return {
|
| 975 |
+
x: bounds.x * viewport.scale + viewport.x,
|
| 976 |
+
y: bounds.y * viewport.scale + viewport.y,
|
| 977 |
+
width: bounds.width * viewport.scale,
|
| 978 |
+
height: bounds.height * viewport.scale,
|
| 979 |
+
}
|
| 980 |
+
}
|
src/lib/trainingClusterModel.ts
ADDED
|
@@ -0,0 +1,1882 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
export interface ModelConfig {
|
| 2 |
+
architecture: 'dense' | 'moe'
|
| 3 |
+
hiddenDim: number
|
| 4 |
+
numLayers: number
|
| 5 |
+
numHeads: number
|
| 6 |
+
numKVHeads: number
|
| 7 |
+
vocabSize: number
|
| 8 |
+
intermediateSize: number
|
| 9 |
+
tiedEmbeddings: boolean
|
| 10 |
+
attentionProfile?: {
|
| 11 |
+
type: 'full' | 'hybrid'
|
| 12 |
+
slidingWindowSize?: number
|
| 13 |
+
globalAttentionFraction?: number
|
| 14 |
+
globalAttentionEveryN?: number
|
| 15 |
+
}
|
| 16 |
+
moe?: {
|
| 17 |
+
numExperts: number
|
| 18 |
+
expertsPerToken: number
|
| 19 |
+
numDenseLayers: number
|
| 20 |
+
expertIntermediateSize: number
|
| 21 |
+
activeParamsPerToken?: number
|
| 22 |
+
}
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
export interface TrainingConfig {
|
| 26 |
+
microBatchSize: number
|
| 27 |
+
seqLength: number
|
| 28 |
+
gradAccumSteps: number
|
| 29 |
+
precision: 'fp32' | 'bf16' | 'fp16' | 'fp8'
|
| 30 |
+
activationCheckpointing: boolean
|
| 31 |
+
optimizer: 'adam' | 'adamw' | 'sgd' | 'muon'
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
export interface GPUSpec {
|
| 35 |
+
name: string
|
| 36 |
+
hbmCapacityGB: number
|
| 37 |
+
peakTFLOPsBF16: number
|
| 38 |
+
memBandwidthTBs: number
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
export interface ClusterConfig {
|
| 42 |
+
gpuType: GPUSpec
|
| 43 |
+
gpusPerNode: number
|
| 44 |
+
numNodes: number
|
| 45 |
+
intraNodeBandwidthGBs: number
|
| 46 |
+
interNodeBandwidthGBs: number
|
| 47 |
+
nodesPerRack?: number
|
| 48 |
+
rackLabel?: string
|
| 49 |
+
nodeLabel?: string
|
| 50 |
+
podLabel?: string
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
export interface ParallelismConfig {
|
| 54 |
+
tp: number
|
| 55 |
+
pp: number
|
| 56 |
+
cp: number
|
| 57 |
+
ep: number
|
| 58 |
+
distributedOptimizer: boolean
|
| 59 |
+
fsdpShardGroupSize: number
|
| 60 |
+
zeroStage: 0 | 1 | 2 | 3
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
export interface ClusterAnalysis {
|
| 64 |
+
feasible: boolean
|
| 65 |
+
infeasibilityReason?: string
|
| 66 |
+
totalParams: number
|
| 67 |
+
activeParamsPerToken: number
|
| 68 |
+
globalBatchSizeTokens: number
|
| 69 |
+
totalGPUs: number
|
| 70 |
+
derivedParallelism: {
|
| 71 |
+
dp: number
|
| 72 |
+
replicaGroups: number
|
| 73 |
+
fsdpShardGroupSize: number
|
| 74 |
+
fsdpGroupSize: number
|
| 75 |
+
ep: number
|
| 76 |
+
}
|
| 77 |
+
memoryBreakdown: {
|
| 78 |
+
parametersGB: number
|
| 79 |
+
optimizerStatesGB: number
|
| 80 |
+
gradientsGB: number
|
| 81 |
+
activationsGB: number
|
| 82 |
+
totalGB: number
|
| 83 |
+
hbmCapacityGB: number
|
| 84 |
+
utilizationPercent: number
|
| 85 |
+
}
|
| 86 |
+
pipelineStages: {
|
| 87 |
+
stageIndex: number
|
| 88 |
+
layerRange: [number, number]
|
| 89 |
+
numLayers: number
|
| 90 |
+
memoryGB: number
|
| 91 |
+
hasEmbedding: boolean
|
| 92 |
+
hasOutputHead: boolean
|
| 93 |
+
}[]
|
| 94 |
+
communication: {
|
| 95 |
+
tp: {
|
| 96 |
+
allReducesPerLayer: number
|
| 97 |
+
messageSizeBytes: number
|
| 98 |
+
totalVolumePerStepGB: number
|
| 99 |
+
timePerStepMs: number
|
| 100 |
+
linkUtilizationPercent: number
|
| 101 |
+
}
|
| 102 |
+
pp: {
|
| 103 |
+
activationMessageSizeBytes: number
|
| 104 |
+
numP2PTransfersPerStep: number
|
| 105 |
+
totalVolumePerStepGB: number
|
| 106 |
+
timePerStepMs: number
|
| 107 |
+
usesInterNode: boolean
|
| 108 |
+
}
|
| 109 |
+
cp: {
|
| 110 |
+
collectivesPerLayer: number
|
| 111 |
+
messageSizeBytes: number
|
| 112 |
+
totalVolumePerStepGB: number
|
| 113 |
+
timePerStepMs: number
|
| 114 |
+
linkUtilizationPercent: number
|
| 115 |
+
usesInterNode: boolean
|
| 116 |
+
}
|
| 117 |
+
fsdp: {
|
| 118 |
+
collectivesPerLayer: number
|
| 119 |
+
messageSizeBytes: number
|
| 120 |
+
totalVolumePerStepGB: number
|
| 121 |
+
timePerStepMs: number
|
| 122 |
+
linkUtilizationPercent: number
|
| 123 |
+
usesInterNode: boolean
|
| 124 |
+
}
|
| 125 |
+
ep: {
|
| 126 |
+
allToAllsPerLayer: number
|
| 127 |
+
messageSizeBytes: number
|
| 128 |
+
totalVolumePerStepGB: number
|
| 129 |
+
timePerStepMs: number
|
| 130 |
+
linkUtilizationPercent: number
|
| 131 |
+
usesInterNode: boolean
|
| 132 |
+
}
|
| 133 |
+
dp: {
|
| 134 |
+
gradientVolumePerGPU_GB: number
|
| 135 |
+
allReduceTimeMs: number
|
| 136 |
+
canOverlapWithBackward: boolean
|
| 137 |
+
linkUtilizationPercent: number
|
| 138 |
+
}
|
| 139 |
+
}
|
| 140 |
+
throughput: {
|
| 141 |
+
computeTimePerStepMs: number
|
| 142 |
+
communicationTimePerStepMs: number
|
| 143 |
+
pipelineBubbleFraction: number
|
| 144 |
+
pipelineBubbleTimeMs: number
|
| 145 |
+
totalStepTimeMs: number
|
| 146 |
+
tokensPerSecond: number
|
| 147 |
+
mfu: number
|
| 148 |
+
}
|
| 149 |
+
gpuMap: {
|
| 150 |
+
globalGPUIndex: number
|
| 151 |
+
nodeIndex: number
|
| 152 |
+
localGPUIndex: number
|
| 153 |
+
tpGroup: number
|
| 154 |
+
tpLane: number
|
| 155 |
+
ppStage: number
|
| 156 |
+
cpShard: number
|
| 157 |
+
epLane: number
|
| 158 |
+
dpReplica: number
|
| 159 |
+
replicaGroup: number
|
| 160 |
+
fsdpRank: number
|
| 161 |
+
memoryUsedGB: number
|
| 162 |
+
memoryCapacityGB: number
|
| 163 |
+
isActive: boolean
|
| 164 |
+
}[]
|
| 165 |
+
links: {
|
| 166 |
+
fromGPU: number
|
| 167 |
+
toGPU: number
|
| 168 |
+
type: 'nvlink' | 'infiniband'
|
| 169 |
+
trafficType: 'tp' | 'pp' | 'cp' | 'fsdp' | 'ep' | 'dp'
|
| 170 |
+
volumeGB: number
|
| 171 |
+
utilizationPercent: number
|
| 172 |
+
}[]
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
type LayerDistribution = {
|
| 176 |
+
stageIndex: number
|
| 177 |
+
startLayer: number
|
| 178 |
+
endLayer: number
|
| 179 |
+
numLayers: number
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
type StageMemory = {
|
| 183 |
+
parametersGB: number
|
| 184 |
+
optimizerStatesGB: number
|
| 185 |
+
gradientsGB: number
|
| 186 |
+
activationsGB: number
|
| 187 |
+
totalGB: number
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
type StageParameterCount = {
|
| 191 |
+
stageParams: number
|
| 192 |
+
sharedParams: number
|
| 193 |
+
expertParams: number
|
| 194 |
+
denseLayers: number
|
| 195 |
+
moeLayers: number
|
| 196 |
+
hasEmbedding: boolean
|
| 197 |
+
hasOutputHead: boolean
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
type PlacementEntry = {
|
| 201 |
+
globalGPUIndex: number
|
| 202 |
+
nodeIndex: number
|
| 203 |
+
localGPUIndex: number
|
| 204 |
+
tpGroup: number
|
| 205 |
+
tpLane: number
|
| 206 |
+
ppStage: number
|
| 207 |
+
cpShard: number
|
| 208 |
+
epLane: number
|
| 209 |
+
dpReplica: number
|
| 210 |
+
replicaGroup: number
|
| 211 |
+
fsdpRank: number
|
| 212 |
+
isActive: boolean
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
+
type DerivedParallelism = {
|
| 216 |
+
modelParallelSize: number
|
| 217 |
+
dp: number
|
| 218 |
+
replicaGroups: number
|
| 219 |
+
fsdpGroupSize: number
|
| 220 |
+
fsdpDataParallelDegree: number
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
type ModelBreakdown = ReturnType<typeof getModelBreakdown>
|
| 224 |
+
|
| 225 |
+
type RingCommStats = {
|
| 226 |
+
volumeBytesPerGpu: number
|
| 227 |
+
totalVolumeBytes: number
|
| 228 |
+
timePerStepMs: number
|
| 229 |
+
linkUtilizationPercent: number
|
| 230 |
+
usesInterNode: boolean
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
const BYTES_PER_GB = 1e9
|
| 234 |
+
const TP_ALL_REDUCES_PER_LAYER = 4
|
| 235 |
+
const CP_COLLECTIVES_PER_LAYER = 2
|
| 236 |
+
const FSDP_COLLECTIVES_PER_LAYER = 4
|
| 237 |
+
const EP_ALL_TO_ALLS_PER_LAYER = 2
|
| 238 |
+
const DEFAULT_BF16_EFFICIENCY = 0.56
|
| 239 |
+
|
| 240 |
+
const clamp = (value: number, min: number, max: number) =>
|
| 241 |
+
Math.min(Math.max(value, min), max)
|
| 242 |
+
|
| 243 |
+
const bytesToGB = (bytes: number) => bytes / BYTES_PER_GB
|
| 244 |
+
|
| 245 |
+
const round2 = (value: number) => Math.round(value * 100) / 100
|
| 246 |
+
|
| 247 |
+
const getParameterBytes = (precision: TrainingConfig['precision']) => {
|
| 248 |
+
switch (precision) {
|
| 249 |
+
case 'fp32':
|
| 250 |
+
return 4
|
| 251 |
+
case 'fp8':
|
| 252 |
+
return 1
|
| 253 |
+
default:
|
| 254 |
+
return 2
|
| 255 |
+
}
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
const getActivationBytes = (precision: TrainingConfig['precision']) =>
|
| 259 |
+
precision === 'fp32' ? 4 : 2
|
| 260 |
+
|
| 261 |
+
const getGradientBytes = (precision: TrainingConfig['precision']) =>
|
| 262 |
+
precision === 'fp32' ? 4 : 2
|
| 263 |
+
|
| 264 |
+
const getOptimizerBytesPerParam = (
|
| 265 |
+
optimizer: TrainingConfig['optimizer'],
|
| 266 |
+
precision: TrainingConfig['precision'],
|
| 267 |
+
) => {
|
| 268 |
+
if (optimizer === 'sgd') {
|
| 269 |
+
return 4
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
// Muon keeps lower optimizer state than Adam-family optimizers in practice.
|
| 273 |
+
// We model it as 8 bytes per parameter of extra state on top of bf16 weights.
|
| 274 |
+
if (optimizer === 'muon') {
|
| 275 |
+
return 8
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
return precision === 'fp32' ? 8 : 12
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
const getPeakTFLOPsForPrecision = (gpu: GPUSpec, precision: TrainingConfig['precision']) => {
|
| 282 |
+
switch (precision) {
|
| 283 |
+
case 'fp32':
|
| 284 |
+
return gpu.peakTFLOPsBF16 * 0.25
|
| 285 |
+
case 'fp8':
|
| 286 |
+
return gpu.peakTFLOPsBF16 * 2
|
| 287 |
+
default:
|
| 288 |
+
return gpu.peakTFLOPsBF16
|
| 289 |
+
}
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
const getSustainedComputeEfficiency = (training: TrainingConfig) => {
|
| 293 |
+
const checkpointPenalty = training.activationCheckpointing ? 0.02 : 0
|
| 294 |
+
const fp32Penalty = training.precision === 'fp32' ? 0.08 : 0
|
| 295 |
+
const moeBoost = training.optimizer === 'muon' ? 0.02 : 0
|
| 296 |
+
|
| 297 |
+
return clamp(DEFAULT_BF16_EFFICIENCY - checkpointPenalty - fp32Penalty + moeBoost, 0.3, 0.62)
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
const distributeLayers = (numLayers: number, pp: number): LayerDistribution[] => {
|
| 301 |
+
const baseLayers = Math.floor(numLayers / pp)
|
| 302 |
+
const remainder = numLayers % pp
|
| 303 |
+
let startLayer = 0
|
| 304 |
+
|
| 305 |
+
return Array.from({ length: pp }, (_, stageIndex) => {
|
| 306 |
+
const stageLayers = baseLayers + (stageIndex < remainder ? 1 : 0)
|
| 307 |
+
const endLayer = startLayer + stageLayers - 1
|
| 308 |
+
const distribution = {
|
| 309 |
+
stageIndex,
|
| 310 |
+
startLayer,
|
| 311 |
+
endLayer,
|
| 312 |
+
numLayers: stageLayers,
|
| 313 |
+
}
|
| 314 |
+
|
| 315 |
+
startLayer += stageLayers
|
| 316 |
+
return distribution
|
| 317 |
+
})
|
| 318 |
+
}
|
| 319 |
+
|
| 320 |
+
const getDefaultFabric = (gpu: GPUSpec) => {
|
| 321 |
+
const normalizedName = gpu.name.toLowerCase()
|
| 322 |
+
|
| 323 |
+
if (normalizedName.includes('gb200')) {
|
| 324 |
+
return {
|
| 325 |
+
intraNodeBandwidthGBs: 900,
|
| 326 |
+
interNodeBandwidthGBs: 100,
|
| 327 |
+
}
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
if (normalizedName.includes('h100')) {
|
| 331 |
+
return {
|
| 332 |
+
intraNodeBandwidthGBs: 450,
|
| 333 |
+
interNodeBandwidthGBs: 100,
|
| 334 |
+
}
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
return {
|
| 338 |
+
intraNodeBandwidthGBs: 300,
|
| 339 |
+
interNodeBandwidthGBs: 50,
|
| 340 |
+
}
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
const getModelBreakdown = (model: ModelConfig) => {
|
| 344 |
+
const headDim = model.hiddenDim / model.numHeads
|
| 345 |
+
const embeddingParams = model.vocabSize * model.hiddenDim
|
| 346 |
+
const kvProjectionDim = model.numKVHeads * headDim
|
| 347 |
+
|
| 348 |
+
const perLayerAttentionParams =
|
| 349 |
+
model.hiddenDim * (model.hiddenDim + 2 * kvProjectionDim + model.hiddenDim)
|
| 350 |
+
const perLayerDenseMlpParams = model.hiddenDim * model.intermediateSize * 3
|
| 351 |
+
const perLayerNormParams = model.hiddenDim * 2
|
| 352 |
+
const finalNormParams = model.hiddenDim
|
| 353 |
+
const outputHeadParams = model.tiedEmbeddings ? 0 : embeddingParams
|
| 354 |
+
const perExpertParams =
|
| 355 |
+
model.architecture === 'moe' && model.moe
|
| 356 |
+
? model.hiddenDim * model.moe.expertIntermediateSize * 3
|
| 357 |
+
: 0
|
| 358 |
+
const totalExpertParamsPerLayer =
|
| 359 |
+
model.architecture === 'moe' && model.moe ? perExpertParams * model.moe.numExperts : 0
|
| 360 |
+
const denseLayerCount =
|
| 361 |
+
model.architecture === 'moe' && model.moe ? model.moe.numDenseLayers : model.numLayers
|
| 362 |
+
const moeLayerCount = model.numLayers - denseLayerCount
|
| 363 |
+
const sharedDenseLayerParams =
|
| 364 |
+
perLayerAttentionParams + perLayerDenseMlpParams + perLayerNormParams
|
| 365 |
+
const sharedMoeLayerParams = perLayerAttentionParams + perLayerNormParams
|
| 366 |
+
const sharedParams =
|
| 367 |
+
embeddingParams +
|
| 368 |
+
denseLayerCount * sharedDenseLayerParams +
|
| 369 |
+
moeLayerCount * sharedMoeLayerParams +
|
| 370 |
+
finalNormParams +
|
| 371 |
+
outputHeadParams
|
| 372 |
+
const totalParams = sharedParams + moeLayerCount * totalExpertParamsPerLayer
|
| 373 |
+
const derivedActiveParams =
|
| 374 |
+
model.architecture === 'moe' && model.moe
|
| 375 |
+
? embeddingParams +
|
| 376 |
+
denseLayerCount * sharedDenseLayerParams +
|
| 377 |
+
moeLayerCount *
|
| 378 |
+
(sharedMoeLayerParams + model.moe.expertsPerToken * perExpertParams) +
|
| 379 |
+
finalNormParams +
|
| 380 |
+
outputHeadParams
|
| 381 |
+
: totalParams
|
| 382 |
+
const activeParamsPerToken =
|
| 383 |
+
model.architecture === 'moe' && model.moe?.activeParamsPerToken != null
|
| 384 |
+
? model.moe.activeParamsPerToken
|
| 385 |
+
: derivedActiveParams
|
| 386 |
+
|
| 387 |
+
const perLayerTotalParams =
|
| 388 |
+
model.architecture === 'moe'
|
| 389 |
+
? sharedMoeLayerParams + totalExpertParamsPerLayer
|
| 390 |
+
: sharedDenseLayerParams
|
| 391 |
+
|
| 392 |
+
return {
|
| 393 |
+
headDim,
|
| 394 |
+
kvProjectionDim,
|
| 395 |
+
embeddingParams,
|
| 396 |
+
perLayerAttentionParams,
|
| 397 |
+
perLayerDenseMlpParams,
|
| 398 |
+
perLayerNormParams,
|
| 399 |
+
perExpertParams,
|
| 400 |
+
totalExpertParamsPerLayer,
|
| 401 |
+
sharedDenseLayerParams,
|
| 402 |
+
sharedMoeLayerParams,
|
| 403 |
+
denseLayerCount,
|
| 404 |
+
moeLayerCount,
|
| 405 |
+
sharedParams,
|
| 406 |
+
perLayerTotalParams,
|
| 407 |
+
finalNormParams,
|
| 408 |
+
outputHeadParams,
|
| 409 |
+
totalParams,
|
| 410 |
+
activeParamsPerToken,
|
| 411 |
+
}
|
| 412 |
+
}
|
| 413 |
+
|
| 414 |
+
const getConcurrentMicroBatches = (
|
| 415 |
+
training: TrainingConfig,
|
| 416 |
+
parallelism: ParallelismConfig,
|
| 417 |
+
) => {
|
| 418 |
+
if (parallelism.pp <= 1) {
|
| 419 |
+
return 1
|
| 420 |
+
}
|
| 421 |
+
|
| 422 |
+
return Math.max(1, Math.min(training.gradAccumSteps, parallelism.pp))
|
| 423 |
+
}
|
| 424 |
+
|
| 425 |
+
const getAttentionMultiplier = (model: ModelConfig, seqLength: number) => {
|
| 426 |
+
const profile = model.attentionProfile
|
| 427 |
+
if (!profile || profile.type === 'full') {
|
| 428 |
+
return 1
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
const windowMultiplier =
|
| 432 |
+
profile.slidingWindowSize != null
|
| 433 |
+
? clamp(profile.slidingWindowSize / seqLength, 0, 1)
|
| 434 |
+
: 1
|
| 435 |
+
const globalFraction =
|
| 436 |
+
profile.globalAttentionFraction ??
|
| 437 |
+
(profile.globalAttentionEveryN != null ? 1 / profile.globalAttentionEveryN : 0.25)
|
| 438 |
+
|
| 439 |
+
return clamp(globalFraction + (1 - globalFraction) * windowMultiplier, windowMultiplier, 1)
|
| 440 |
+
}
|
| 441 |
+
|
| 442 |
+
const getStageLayerMix = (stage: LayerDistribution, model: ModelConfig) => {
|
| 443 |
+
if (model.architecture !== 'moe' || !model.moe) {
|
| 444 |
+
return {
|
| 445 |
+
denseLayers: stage.numLayers,
|
| 446 |
+
moeLayers: 0,
|
| 447 |
+
}
|
| 448 |
+
}
|
| 449 |
+
|
| 450 |
+
const denseEnd = model.moe.numDenseLayers - 1
|
| 451 |
+
const denseLayers =
|
| 452 |
+
denseEnd < stage.startLayer
|
| 453 |
+
? 0
|
| 454 |
+
: Math.max(0, Math.min(stage.endLayer, denseEnd) - stage.startLayer + 1)
|
| 455 |
+
|
| 456 |
+
return {
|
| 457 |
+
denseLayers,
|
| 458 |
+
moeLayers: stage.numLayers - denseLayers,
|
| 459 |
+
}
|
| 460 |
+
}
|
| 461 |
+
|
| 462 |
+
const getStageParameterCount = (
|
| 463 |
+
stage: LayerDistribution,
|
| 464 |
+
modelBreakdown: ModelBreakdown,
|
| 465 |
+
parallelism: ParallelismConfig,
|
| 466 |
+
model: ModelConfig,
|
| 467 |
+
): StageParameterCount => {
|
| 468 |
+
const layerMix = getStageLayerMix(stage, model)
|
| 469 |
+
let sharedParams =
|
| 470 |
+
layerMix.denseLayers * modelBreakdown.sharedDenseLayerParams +
|
| 471 |
+
layerMix.moeLayers * modelBreakdown.sharedMoeLayerParams
|
| 472 |
+
const expertParams = layerMix.moeLayers * modelBreakdown.totalExpertParamsPerLayer
|
| 473 |
+
const hasEmbedding = stage.stageIndex === 0
|
| 474 |
+
const hasOutputHead = stage.stageIndex === parallelism.pp - 1
|
| 475 |
+
|
| 476 |
+
if (hasEmbedding) {
|
| 477 |
+
sharedParams += modelBreakdown.embeddingParams
|
| 478 |
+
}
|
| 479 |
+
|
| 480 |
+
if (hasOutputHead) {
|
| 481 |
+
sharedParams += modelBreakdown.finalNormParams + modelBreakdown.outputHeadParams
|
| 482 |
+
}
|
| 483 |
+
|
| 484 |
+
return {
|
| 485 |
+
stageParams: sharedParams + expertParams,
|
| 486 |
+
sharedParams,
|
| 487 |
+
expertParams,
|
| 488 |
+
denseLayers: layerMix.denseLayers,
|
| 489 |
+
moeLayers: layerMix.moeLayers,
|
| 490 |
+
hasEmbedding,
|
| 491 |
+
hasOutputHead,
|
| 492 |
+
}
|
| 493 |
+
}
|
| 494 |
+
|
| 495 |
+
const getActivationMemoryBytesPerLayer = ({
|
| 496 |
+
model,
|
| 497 |
+
training,
|
| 498 |
+
parallelism,
|
| 499 |
+
isMoeLayer,
|
| 500 |
+
}: {
|
| 501 |
+
model: ModelConfig
|
| 502 |
+
training: TrainingConfig
|
| 503 |
+
parallelism: ParallelismConfig
|
| 504 |
+
isMoeLayer: boolean
|
| 505 |
+
}) => {
|
| 506 |
+
const activationBytes = getActivationBytes(training.precision)
|
| 507 |
+
const shardedSequenceLength = training.seqLength / parallelism.cp
|
| 508 |
+
const tokensPerShard = training.microBatchSize * shardedSequenceLength
|
| 509 |
+
const kvHiddenDim = model.numKVHeads * (model.hiddenDim / model.numHeads)
|
| 510 |
+
const tpSequenceShardFactor = parallelism.tp > 1 ? parallelism.tp : 1
|
| 511 |
+
// Sequence parallelism shards the residual stream and checkpointed layer boundaries across
|
| 512 |
+
// the TP group. We assume TP-enabled dense training uses this Megatron-style optimization.
|
| 513 |
+
const hiddenStateBytes =
|
| 514 |
+
(tokensPerShard * model.hiddenDim * activationBytes) / tpSequenceShardFactor
|
| 515 |
+
const attentionMultiplier = getAttentionMultiplier(model, training.seqLength)
|
| 516 |
+
|
| 517 |
+
// Sequence-parallel CP reduces the activation footprint by the number of sequence shards.
|
| 518 |
+
const qkvBytes =
|
| 519 |
+
tokensPerShard * (model.hiddenDim + 2 * kvHiddenDim) * activationBytes * attentionMultiplier
|
| 520 |
+
const denseMlpBytes = tokensPerShard * model.intermediateSize * activationBytes * 2
|
| 521 |
+
const moeMlpBytes =
|
| 522 |
+
isMoeLayer && model.moe
|
| 523 |
+
? (tokensPerShard *
|
| 524 |
+
model.moe.expertIntermediateSize *
|
| 525 |
+
activationBytes *
|
| 526 |
+
model.moe.expertsPerToken *
|
| 527 |
+
2) /
|
| 528 |
+
Math.max(parallelism.ep, 1)
|
| 529 |
+
: 0
|
| 530 |
+
const shardedIntermediateBytes =
|
| 531 |
+
(qkvBytes + (isMoeLayer ? moeMlpBytes : denseMlpBytes)) / Math.max(parallelism.tp, 1)
|
| 532 |
+
|
| 533 |
+
if (training.activationCheckpointing) {
|
| 534 |
+
return hiddenStateBytes * 2 + shardedIntermediateBytes * 0.25
|
| 535 |
+
}
|
| 536 |
+
|
| 537 |
+
return hiddenStateBytes * 6 + shardedIntermediateBytes * 2
|
| 538 |
+
}
|
| 539 |
+
|
| 540 |
+
const getStageMemory = (
|
| 541 |
+
stageParams: StageParameterCount,
|
| 542 |
+
model: ModelConfig,
|
| 543 |
+
training: TrainingConfig,
|
| 544 |
+
parallelism: ParallelismConfig,
|
| 545 |
+
derivedParallelism: DerivedParallelism,
|
| 546 |
+
) => {
|
| 547 |
+
const parameterBytes = getParameterBytes(training.precision)
|
| 548 |
+
const gradientBytes = getGradientBytes(training.precision)
|
| 549 |
+
const optimizerBytes = getOptimizerBytesPerParam(training.optimizer, training.precision)
|
| 550 |
+
const fsdpShardFactor =
|
| 551 |
+
parallelism.fsdpShardGroupSize > 1 ? derivedParallelism.fsdpDataParallelDegree : 1
|
| 552 |
+
const distributedShardFactor = parallelism.distributedOptimizer ? derivedParallelism.dp : 1
|
| 553 |
+
const parameterShardFactor =
|
| 554 |
+
parallelism.zeroStage >= 3 ? fsdpShardFactor : 1
|
| 555 |
+
const optimizerShardFactor =
|
| 556 |
+
parallelism.zeroStage >= 1
|
| 557 |
+
? parallelism.fsdpShardGroupSize > 1
|
| 558 |
+
? fsdpShardFactor
|
| 559 |
+
: distributedShardFactor
|
| 560 |
+
: 1
|
| 561 |
+
const gradientShardFactor =
|
| 562 |
+
parallelism.zeroStage >= 2
|
| 563 |
+
? parallelism.fsdpShardGroupSize > 1
|
| 564 |
+
? fsdpShardFactor
|
| 565 |
+
: derivedParallelism.dp
|
| 566 |
+
: 1
|
| 567 |
+
|
| 568 |
+
const sharedParamsLocal = stageParams.sharedParams / Math.max(parallelism.tp, 1)
|
| 569 |
+
const expertParamsLocal =
|
| 570 |
+
stageParams.expertParams / Math.max(parallelism.tp * parallelism.ep, 1)
|
| 571 |
+
const parameterMemoryBytes =
|
| 572 |
+
(sharedParamsLocal / parameterShardFactor + expertParamsLocal / parameterShardFactor) *
|
| 573 |
+
parameterBytes
|
| 574 |
+
const optimizerMemoryBytes =
|
| 575 |
+
(sharedParamsLocal / optimizerShardFactor + expertParamsLocal / optimizerShardFactor) *
|
| 576 |
+
optimizerBytes
|
| 577 |
+
const gradientMemoryBytes =
|
| 578 |
+
(sharedParamsLocal / gradientShardFactor + expertParamsLocal / gradientShardFactor) *
|
| 579 |
+
gradientBytes
|
| 580 |
+
|
| 581 |
+
const denseLayerActivationBytes = getActivationMemoryBytesPerLayer({
|
| 582 |
+
model,
|
| 583 |
+
training,
|
| 584 |
+
parallelism,
|
| 585 |
+
isMoeLayer: false,
|
| 586 |
+
})
|
| 587 |
+
const moeLayerActivationBytes = getActivationMemoryBytesPerLayer({
|
| 588 |
+
model,
|
| 589 |
+
training,
|
| 590 |
+
parallelism,
|
| 591 |
+
isMoeLayer: true,
|
| 592 |
+
})
|
| 593 |
+
const concurrentMicroBatches = getConcurrentMicroBatches(training, parallelism)
|
| 594 |
+
let activationMemoryBytes =
|
| 595 |
+
(denseLayerActivationBytes * stageParams.denseLayers +
|
| 596 |
+
moeLayerActivationBytes * stageParams.moeLayers) *
|
| 597 |
+
concurrentMicroBatches
|
| 598 |
+
|
| 599 |
+
if (training.activationCheckpointing && stageParams.stageParams > 0) {
|
| 600 |
+
activationMemoryBytes +=
|
| 601 |
+
Math.max(denseLayerActivationBytes, moeLayerActivationBytes) * 1.5
|
| 602 |
+
}
|
| 603 |
+
|
| 604 |
+
const totalBytes =
|
| 605 |
+
parameterMemoryBytes + optimizerMemoryBytes + gradientMemoryBytes + activationMemoryBytes
|
| 606 |
+
|
| 607 |
+
return {
|
| 608 |
+
parametersGB: bytesToGB(parameterMemoryBytes),
|
| 609 |
+
optimizerStatesGB: bytesToGB(optimizerMemoryBytes),
|
| 610 |
+
gradientsGB: bytesToGB(gradientMemoryBytes),
|
| 611 |
+
activationsGB: bytesToGB(activationMemoryBytes),
|
| 612 |
+
totalGB: bytesToGB(totalBytes),
|
| 613 |
+
}
|
| 614 |
+
}
|
| 615 |
+
|
| 616 |
+
const getStageMemoryMap = (
|
| 617 |
+
model: ModelConfig,
|
| 618 |
+
training: TrainingConfig,
|
| 619 |
+
parallelism: ParallelismConfig,
|
| 620 |
+
derivedParallelism: DerivedParallelism,
|
| 621 |
+
) => {
|
| 622 |
+
const modelBreakdown = getModelBreakdown(model)
|
| 623 |
+
const layerDistribution = distributeLayers(model.numLayers, parallelism.pp)
|
| 624 |
+
const stageMemory = new Map<number, StageMemory>()
|
| 625 |
+
const stageParameters = new Map<number, StageParameterCount>()
|
| 626 |
+
|
| 627 |
+
for (const stage of layerDistribution) {
|
| 628 |
+
const stageParameterCount = getStageParameterCount(stage, modelBreakdown, parallelism, model)
|
| 629 |
+
stageParameters.set(stage.stageIndex, stageParameterCount)
|
| 630 |
+
stageMemory.set(
|
| 631 |
+
stage.stageIndex,
|
| 632 |
+
getStageMemory(stageParameterCount, model, training, parallelism, derivedParallelism),
|
| 633 |
+
)
|
| 634 |
+
}
|
| 635 |
+
|
| 636 |
+
return {
|
| 637 |
+
modelBreakdown,
|
| 638 |
+
layerDistribution,
|
| 639 |
+
stageMemory,
|
| 640 |
+
stageParameters,
|
| 641 |
+
}
|
| 642 |
+
}
|
| 643 |
+
|
| 644 |
+
const buildPlacement = (
|
| 645 |
+
cluster: ClusterConfig,
|
| 646 |
+
parallelism: ParallelismConfig,
|
| 647 |
+
derivedParallelism: DerivedParallelism,
|
| 648 |
+
requiredGPUs: number,
|
| 649 |
+
) => {
|
| 650 |
+
const totalGPUs = cluster.gpusPerNode * cluster.numNodes
|
| 651 |
+
const placement: PlacementEntry[] = []
|
| 652 |
+
let nodeIndex = 0
|
| 653 |
+
let localGPUIndex = 0
|
| 654 |
+
let globalGPUIndex = 0
|
| 655 |
+
|
| 656 |
+
for (let replicaGroup = 0; replicaGroup < derivedParallelism.replicaGroups; replicaGroup += 1) {
|
| 657 |
+
for (let fsdpRank = 0; fsdpRank < derivedParallelism.fsdpDataParallelDegree; fsdpRank += 1) {
|
| 658 |
+
const dpReplica = replicaGroup * derivedParallelism.fsdpDataParallelDegree + fsdpRank
|
| 659 |
+
|
| 660 |
+
for (let ppStage = 0; ppStage < parallelism.pp; ppStage += 1) {
|
| 661 |
+
for (let cpShard = 0; cpShard < parallelism.cp; cpShard += 1) {
|
| 662 |
+
if (localGPUIndex + parallelism.ep * parallelism.tp > cluster.gpusPerNode) {
|
| 663 |
+
nodeIndex += 1
|
| 664 |
+
localGPUIndex = 0
|
| 665 |
+
}
|
| 666 |
+
|
| 667 |
+
for (let epLane = 0; epLane < parallelism.ep; epLane += 1) {
|
| 668 |
+
for (let tpLane = 0; tpLane < parallelism.tp; tpLane += 1) {
|
| 669 |
+
placement.push({
|
| 670 |
+
globalGPUIndex,
|
| 671 |
+
nodeIndex,
|
| 672 |
+
localGPUIndex,
|
| 673 |
+
tpGroup:
|
| 674 |
+
(((dpReplica * parallelism.pp + ppStage) * parallelism.cp + cpShard) *
|
| 675 |
+
parallelism.ep) +
|
| 676 |
+
epLane,
|
| 677 |
+
tpLane,
|
| 678 |
+
ppStage,
|
| 679 |
+
cpShard,
|
| 680 |
+
epLane,
|
| 681 |
+
dpReplica,
|
| 682 |
+
replicaGroup,
|
| 683 |
+
fsdpRank,
|
| 684 |
+
isActive: globalGPUIndex < requiredGPUs,
|
| 685 |
+
})
|
| 686 |
+
|
| 687 |
+
globalGPUIndex += 1
|
| 688 |
+
localGPUIndex += 1
|
| 689 |
+
}
|
| 690 |
+
}
|
| 691 |
+
}
|
| 692 |
+
}
|
| 693 |
+
}
|
| 694 |
+
}
|
| 695 |
+
|
| 696 |
+
while (placement.length < totalGPUs) {
|
| 697 |
+
if (localGPUIndex >= cluster.gpusPerNode) {
|
| 698 |
+
nodeIndex += 1
|
| 699 |
+
localGPUIndex = 0
|
| 700 |
+
}
|
| 701 |
+
|
| 702 |
+
placement.push({
|
| 703 |
+
globalGPUIndex,
|
| 704 |
+
nodeIndex,
|
| 705 |
+
localGPUIndex,
|
| 706 |
+
tpGroup: -1,
|
| 707 |
+
tpLane: -1,
|
| 708 |
+
ppStage: -1,
|
| 709 |
+
cpShard: -1,
|
| 710 |
+
epLane: -1,
|
| 711 |
+
dpReplica: -1,
|
| 712 |
+
replicaGroup: -1,
|
| 713 |
+
fsdpRank: -1,
|
| 714 |
+
isActive: false,
|
| 715 |
+
})
|
| 716 |
+
globalGPUIndex += 1
|
| 717 |
+
localGPUIndex += 1
|
| 718 |
+
}
|
| 719 |
+
|
| 720 |
+
return placement
|
| 721 |
+
}
|
| 722 |
+
|
| 723 |
+
const getPlacementEntry = (
|
| 724 |
+
placement: PlacementEntry[],
|
| 725 |
+
filters: Partial<
|
| 726 |
+
Pick<
|
| 727 |
+
PlacementEntry,
|
| 728 |
+
'dpReplica' | 'replicaGroup' | 'fsdpRank' | 'ppStage' | 'cpShard' | 'epLane' | 'tpLane'
|
| 729 |
+
>
|
| 730 |
+
>,
|
| 731 |
+
) =>
|
| 732 |
+
placement.find(
|
| 733 |
+
(entry) =>
|
| 734 |
+
(filters.dpReplica == null || entry.dpReplica === filters.dpReplica) &&
|
| 735 |
+
(filters.replicaGroup == null || entry.replicaGroup === filters.replicaGroup) &&
|
| 736 |
+
(filters.fsdpRank == null || entry.fsdpRank === filters.fsdpRank) &&
|
| 737 |
+
(filters.ppStage == null || entry.ppStage === filters.ppStage) &&
|
| 738 |
+
(filters.cpShard == null || entry.cpShard === filters.cpShard) &&
|
| 739 |
+
(filters.epLane == null || entry.epLane === filters.epLane) &&
|
| 740 |
+
(filters.tpLane == null || entry.tpLane === filters.tpLane),
|
| 741 |
+
)
|
| 742 |
+
|
| 743 |
+
const getDerivedParallelism = (
|
| 744 |
+
cluster: ClusterConfig,
|
| 745 |
+
parallelism: ParallelismConfig,
|
| 746 |
+
): DerivedParallelism | null => {
|
| 747 |
+
const totalGPUs = cluster.gpusPerNode * cluster.numNodes
|
| 748 |
+
const modelParallelSize =
|
| 749 |
+
parallelism.tp * parallelism.pp * parallelism.cp * parallelism.ep
|
| 750 |
+
|
| 751 |
+
if (modelParallelSize <= 0 || totalGPUs % modelParallelSize !== 0) {
|
| 752 |
+
return null
|
| 753 |
+
}
|
| 754 |
+
|
| 755 |
+
const dp = totalGPUs / modelParallelSize
|
| 756 |
+
const fsdpGroupSize =
|
| 757 |
+
parallelism.fsdpShardGroupSize > 1 ? parallelism.fsdpShardGroupSize : modelParallelSize
|
| 758 |
+
|
| 759 |
+
if (fsdpGroupSize % modelParallelSize !== 0 || totalGPUs % fsdpGroupSize !== 0) {
|
| 760 |
+
return null
|
| 761 |
+
}
|
| 762 |
+
|
| 763 |
+
return {
|
| 764 |
+
modelParallelSize,
|
| 765 |
+
dp,
|
| 766 |
+
replicaGroups: totalGPUs / fsdpGroupSize,
|
| 767 |
+
fsdpGroupSize,
|
| 768 |
+
fsdpDataParallelDegree: fsdpGroupSize / modelParallelSize,
|
| 769 |
+
}
|
| 770 |
+
}
|
| 771 |
+
|
| 772 |
+
const getMaxBandwidthForCollective = (
|
| 773 |
+
members: PlacementEntry[],
|
| 774 |
+
cluster: ClusterConfig,
|
| 775 |
+
) => {
|
| 776 |
+
if (members.length <= 1) {
|
| 777 |
+
return {
|
| 778 |
+
bandwidthGBs: cluster.intraNodeBandwidthGBs,
|
| 779 |
+
usesInterNode: false,
|
| 780 |
+
}
|
| 781 |
+
}
|
| 782 |
+
|
| 783 |
+
const nodeSet = new Set(members.map((member) => member.nodeIndex))
|
| 784 |
+
const usesInterNode = nodeSet.size > 1
|
| 785 |
+
|
| 786 |
+
return {
|
| 787 |
+
bandwidthGBs: usesInterNode
|
| 788 |
+
? cluster.interNodeBandwidthGBs
|
| 789 |
+
: cluster.intraNodeBandwidthGBs,
|
| 790 |
+
usesInterNode,
|
| 791 |
+
}
|
| 792 |
+
}
|
| 793 |
+
|
| 794 |
+
const getRingCommStats = ({
|
| 795 |
+
groupCount,
|
| 796 |
+
groupWidth,
|
| 797 |
+
messageBytes,
|
| 798 |
+
collectiveCount,
|
| 799 |
+
membersForBandwidth,
|
| 800 |
+
cluster,
|
| 801 |
+
totalStepTimeMs,
|
| 802 |
+
}: {
|
| 803 |
+
groupCount: number
|
| 804 |
+
groupWidth: number
|
| 805 |
+
messageBytes: number
|
| 806 |
+
collectiveCount: number
|
| 807 |
+
membersForBandwidth: PlacementEntry[]
|
| 808 |
+
cluster: ClusterConfig
|
| 809 |
+
totalStepTimeMs: number
|
| 810 |
+
}): RingCommStats => {
|
| 811 |
+
if (groupWidth <= 1 || collectiveCount <= 0 || messageBytes <= 0) {
|
| 812 |
+
return {
|
| 813 |
+
volumeBytesPerGpu: 0,
|
| 814 |
+
totalVolumeBytes: 0,
|
| 815 |
+
timePerStepMs: 0,
|
| 816 |
+
linkUtilizationPercent: 0,
|
| 817 |
+
usesInterNode: false,
|
| 818 |
+
}
|
| 819 |
+
}
|
| 820 |
+
|
| 821 |
+
const ringVolumeBytes = (2 * (groupWidth - 1) * messageBytes) / groupWidth
|
| 822 |
+
const volumeBytesPerGpu = ringVolumeBytes * collectiveCount
|
| 823 |
+
const totalVolumeBytes = volumeBytesPerGpu * groupWidth * groupCount
|
| 824 |
+
const { bandwidthGBs, usesInterNode } = getMaxBandwidthForCollective(
|
| 825 |
+
membersForBandwidth,
|
| 826 |
+
cluster,
|
| 827 |
+
)
|
| 828 |
+
const timePerStepMs = (bytesToGB(volumeBytesPerGpu) / bandwidthGBs) * 1000
|
| 829 |
+
const linkUtilizationPercent =
|
| 830 |
+
totalStepTimeMs > 0
|
| 831 |
+
? clamp(
|
| 832 |
+
(bytesToGB(volumeBytesPerGpu) / (bandwidthGBs * (totalStepTimeMs / 1000))) * 100,
|
| 833 |
+
0,
|
| 834 |
+
100,
|
| 835 |
+
)
|
| 836 |
+
: 0
|
| 837 |
+
|
| 838 |
+
return {
|
| 839 |
+
volumeBytesPerGpu,
|
| 840 |
+
totalVolumeBytes,
|
| 841 |
+
timePerStepMs,
|
| 842 |
+
linkUtilizationPercent,
|
| 843 |
+
usesInterNode,
|
| 844 |
+
}
|
| 845 |
+
}
|
| 846 |
+
|
| 847 |
+
export function analyzeCluster(
|
| 848 |
+
model: ModelConfig,
|
| 849 |
+
training: TrainingConfig,
|
| 850 |
+
cluster: ClusterConfig,
|
| 851 |
+
parallelism: ParallelismConfig,
|
| 852 |
+
): ClusterAnalysis {
|
| 853 |
+
const totalGPUs = cluster.gpusPerNode * cluster.numNodes
|
| 854 |
+
const derivedParallelism = getDerivedParallelism(cluster, parallelism)
|
| 855 |
+
const globalBatchSizeTokens =
|
| 856 |
+
training.microBatchSize *
|
| 857 |
+
training.seqLength *
|
| 858 |
+
training.gradAccumSteps *
|
| 859 |
+
(derivedParallelism?.dp ?? 0)
|
| 860 |
+
|
| 861 |
+
const emptyGpuMap = Array.from({ length: totalGPUs }, (_, globalGPUIndex) => ({
|
| 862 |
+
globalGPUIndex,
|
| 863 |
+
nodeIndex: Math.floor(globalGPUIndex / cluster.gpusPerNode),
|
| 864 |
+
localGPUIndex: globalGPUIndex % cluster.gpusPerNode,
|
| 865 |
+
tpGroup: -1,
|
| 866 |
+
tpLane: -1,
|
| 867 |
+
ppStage: -1,
|
| 868 |
+
cpShard: -1,
|
| 869 |
+
epLane: -1,
|
| 870 |
+
dpReplica: -1,
|
| 871 |
+
replicaGroup: -1,
|
| 872 |
+
fsdpRank: -1,
|
| 873 |
+
memoryUsedGB: 0,
|
| 874 |
+
memoryCapacityGB: cluster.gpuType.hbmCapacityGB,
|
| 875 |
+
isActive: false,
|
| 876 |
+
}))
|
| 877 |
+
|
| 878 |
+
const emptyAnalysis = (): ClusterAnalysis => ({
|
| 879 |
+
feasible: false,
|
| 880 |
+
infeasibilityReason: 'Invalid configuration',
|
| 881 |
+
totalParams: 0,
|
| 882 |
+
activeParamsPerToken: 0,
|
| 883 |
+
globalBatchSizeTokens,
|
| 884 |
+
totalGPUs,
|
| 885 |
+
derivedParallelism: {
|
| 886 |
+
dp: derivedParallelism?.dp ?? 0,
|
| 887 |
+
replicaGroups: derivedParallelism?.replicaGroups ?? 0,
|
| 888 |
+
fsdpShardGroupSize: parallelism.fsdpShardGroupSize,
|
| 889 |
+
fsdpGroupSize: derivedParallelism?.fsdpGroupSize ?? 0,
|
| 890 |
+
ep: parallelism.ep,
|
| 891 |
+
},
|
| 892 |
+
memoryBreakdown: {
|
| 893 |
+
parametersGB: 0,
|
| 894 |
+
optimizerStatesGB: 0,
|
| 895 |
+
gradientsGB: 0,
|
| 896 |
+
activationsGB: 0,
|
| 897 |
+
totalGB: 0,
|
| 898 |
+
hbmCapacityGB: cluster.gpuType.hbmCapacityGB,
|
| 899 |
+
utilizationPercent: 0,
|
| 900 |
+
},
|
| 901 |
+
pipelineStages: [],
|
| 902 |
+
communication: {
|
| 903 |
+
tp: {
|
| 904 |
+
allReducesPerLayer: TP_ALL_REDUCES_PER_LAYER,
|
| 905 |
+
messageSizeBytes: 0,
|
| 906 |
+
totalVolumePerStepGB: 0,
|
| 907 |
+
timePerStepMs: 0,
|
| 908 |
+
linkUtilizationPercent: 0,
|
| 909 |
+
},
|
| 910 |
+
pp: {
|
| 911 |
+
activationMessageSizeBytes: 0,
|
| 912 |
+
numP2PTransfersPerStep: 0,
|
| 913 |
+
totalVolumePerStepGB: 0,
|
| 914 |
+
timePerStepMs: 0,
|
| 915 |
+
usesInterNode: false,
|
| 916 |
+
},
|
| 917 |
+
cp: {
|
| 918 |
+
collectivesPerLayer: CP_COLLECTIVES_PER_LAYER,
|
| 919 |
+
messageSizeBytes: 0,
|
| 920 |
+
totalVolumePerStepGB: 0,
|
| 921 |
+
timePerStepMs: 0,
|
| 922 |
+
linkUtilizationPercent: 0,
|
| 923 |
+
usesInterNode: false,
|
| 924 |
+
},
|
| 925 |
+
fsdp: {
|
| 926 |
+
collectivesPerLayer: FSDP_COLLECTIVES_PER_LAYER,
|
| 927 |
+
messageSizeBytes: 0,
|
| 928 |
+
totalVolumePerStepGB: 0,
|
| 929 |
+
timePerStepMs: 0,
|
| 930 |
+
linkUtilizationPercent: 0,
|
| 931 |
+
usesInterNode: false,
|
| 932 |
+
},
|
| 933 |
+
ep: {
|
| 934 |
+
allToAllsPerLayer: EP_ALL_TO_ALLS_PER_LAYER,
|
| 935 |
+
messageSizeBytes: 0,
|
| 936 |
+
totalVolumePerStepGB: 0,
|
| 937 |
+
timePerStepMs: 0,
|
| 938 |
+
linkUtilizationPercent: 0,
|
| 939 |
+
usesInterNode: false,
|
| 940 |
+
},
|
| 941 |
+
dp: {
|
| 942 |
+
gradientVolumePerGPU_GB: 0,
|
| 943 |
+
allReduceTimeMs: 0,
|
| 944 |
+
canOverlapWithBackward: false,
|
| 945 |
+
linkUtilizationPercent: 0,
|
| 946 |
+
},
|
| 947 |
+
},
|
| 948 |
+
throughput: {
|
| 949 |
+
computeTimePerStepMs: 0,
|
| 950 |
+
communicationTimePerStepMs: 0,
|
| 951 |
+
pipelineBubbleFraction: 0,
|
| 952 |
+
pipelineBubbleTimeMs: 0,
|
| 953 |
+
totalStepTimeMs: 0,
|
| 954 |
+
tokensPerSecond: 0,
|
| 955 |
+
mfu: 0,
|
| 956 |
+
},
|
| 957 |
+
gpuMap: emptyGpuMap,
|
| 958 |
+
links: [],
|
| 959 |
+
})
|
| 960 |
+
|
| 961 |
+
if (
|
| 962 |
+
training.microBatchSize <= 0 ||
|
| 963 |
+
training.seqLength <= 0 ||
|
| 964 |
+
training.gradAccumSteps <= 0 ||
|
| 965 |
+
parallelism.tp <= 0 ||
|
| 966 |
+
parallelism.pp <= 0 ||
|
| 967 |
+
parallelism.cp <= 0 ||
|
| 968 |
+
parallelism.ep <= 0
|
| 969 |
+
) {
|
| 970 |
+
const analysis = emptyAnalysis()
|
| 971 |
+
analysis.infeasibilityReason = 'Batch sizes and parallelism degrees must all be positive.'
|
| 972 |
+
return analysis
|
| 973 |
+
}
|
| 974 |
+
|
| 975 |
+
if (parallelism.tp * parallelism.ep > cluster.gpusPerNode) {
|
| 976 |
+
const analysis = emptyAnalysis()
|
| 977 |
+
analysis.infeasibilityReason =
|
| 978 |
+
`TP × EP requires ${parallelism.tp * parallelism.ep} GPUs per node, but nodes only have ${cluster.gpusPerNode}.`
|
| 979 |
+
return analysis
|
| 980 |
+
}
|
| 981 |
+
|
| 982 |
+
if (!derivedParallelism) {
|
| 983 |
+
const analysis = emptyAnalysis()
|
| 984 |
+
analysis.infeasibilityReason =
|
| 985 |
+
`World size ${totalGPUs} must be divisible by TP × PP × CP × EP, and the FSDP shard group must divide the cluster cleanly.`
|
| 986 |
+
return analysis
|
| 987 |
+
}
|
| 988 |
+
|
| 989 |
+
if (model.hiddenDim % model.numHeads !== 0) {
|
| 990 |
+
const analysis = emptyAnalysis()
|
| 991 |
+
analysis.infeasibilityReason =
|
| 992 |
+
`hiddenDim ${model.hiddenDim} must divide evenly across ${model.numHeads} attention heads.`
|
| 993 |
+
return analysis
|
| 994 |
+
}
|
| 995 |
+
|
| 996 |
+
if (model.numHeads % parallelism.tp !== 0) {
|
| 997 |
+
const analysis = emptyAnalysis()
|
| 998 |
+
analysis.infeasibilityReason =
|
| 999 |
+
`TP ${parallelism.tp} must divide the ${model.numHeads} attention heads.`
|
| 1000 |
+
return analysis
|
| 1001 |
+
}
|
| 1002 |
+
|
| 1003 |
+
if (model.numKVHeads % parallelism.tp !== 0) {
|
| 1004 |
+
const analysis = emptyAnalysis()
|
| 1005 |
+
analysis.infeasibilityReason =
|
| 1006 |
+
`TP ${parallelism.tp} should divide the ${model.numKVHeads} KV heads for clean GQA sharding.`
|
| 1007 |
+
return analysis
|
| 1008 |
+
}
|
| 1009 |
+
|
| 1010 |
+
if (training.seqLength % parallelism.cp !== 0) {
|
| 1011 |
+
const analysis = emptyAnalysis()
|
| 1012 |
+
analysis.infeasibilityReason =
|
| 1013 |
+
`CP ${parallelism.cp} must divide the sequence length ${training.seqLength}.`
|
| 1014 |
+
return analysis
|
| 1015 |
+
}
|
| 1016 |
+
|
| 1017 |
+
if (model.architecture === 'moe' && !model.moe) {
|
| 1018 |
+
const analysis = emptyAnalysis()
|
| 1019 |
+
analysis.infeasibilityReason = 'MoE models require expert metadata.'
|
| 1020 |
+
return analysis
|
| 1021 |
+
}
|
| 1022 |
+
|
| 1023 |
+
if (model.architecture === 'moe' && model.moe && model.moe.numExperts % parallelism.ep !== 0) {
|
| 1024 |
+
const analysis = emptyAnalysis()
|
| 1025 |
+
analysis.infeasibilityReason =
|
| 1026 |
+
`EP ${parallelism.ep} must divide the ${model.moe.numExperts} experts.`
|
| 1027 |
+
return analysis
|
| 1028 |
+
}
|
| 1029 |
+
|
| 1030 |
+
const { modelBreakdown, layerDistribution, stageMemory, stageParameters } = getStageMemoryMap(
|
| 1031 |
+
model,
|
| 1032 |
+
training,
|
| 1033 |
+
parallelism,
|
| 1034 |
+
derivedParallelism,
|
| 1035 |
+
)
|
| 1036 |
+
const placement = buildPlacement(cluster, parallelism, derivedParallelism, totalGPUs)
|
| 1037 |
+
const maxStageLayers = Math.max(...layerDistribution.map((stage) => stage.numLayers), 0)
|
| 1038 |
+
|
| 1039 |
+
const pipelineStages = layerDistribution.map((stage) => {
|
| 1040 |
+
const stageMemoryBreakdown = stageMemory.get(stage.stageIndex)
|
| 1041 |
+
const stageParameterCount = stageParameters.get(stage.stageIndex)
|
| 1042 |
+
|
| 1043 |
+
return {
|
| 1044 |
+
stageIndex: stage.stageIndex,
|
| 1045 |
+
layerRange: [stage.startLayer, stage.endLayer] as [number, number],
|
| 1046 |
+
numLayers: stage.numLayers,
|
| 1047 |
+
memoryGB: round2(
|
| 1048 |
+
(stageMemoryBreakdown?.totalGB ?? 0) *
|
| 1049 |
+
parallelism.tp *
|
| 1050 |
+
parallelism.cp *
|
| 1051 |
+
parallelism.ep *
|
| 1052 |
+
derivedParallelism.dp,
|
| 1053 |
+
),
|
| 1054 |
+
hasEmbedding: stageParameterCount?.hasEmbedding ?? false,
|
| 1055 |
+
hasOutputHead: stageParameterCount?.hasOutputHead ?? false,
|
| 1056 |
+
}
|
| 1057 |
+
})
|
| 1058 |
+
|
| 1059 |
+
const worstStageIndex = pipelineStages.reduce((worstIndex, stage) => {
|
| 1060 |
+
const worstStageMemory = stageMemory.get(worstIndex)?.totalGB ?? 0
|
| 1061 |
+
const candidateStageMemory = stageMemory.get(stage.stageIndex)?.totalGB ?? 0
|
| 1062 |
+
return candidateStageMemory > worstStageMemory ? stage.stageIndex : worstIndex
|
| 1063 |
+
}, 0)
|
| 1064 |
+
|
| 1065 |
+
const worstStageMemory = stageMemory.get(worstStageIndex) ?? {
|
| 1066 |
+
parametersGB: 0,
|
| 1067 |
+
optimizerStatesGB: 0,
|
| 1068 |
+
gradientsGB: 0,
|
| 1069 |
+
activationsGB: 0,
|
| 1070 |
+
totalGB: 0,
|
| 1071 |
+
}
|
| 1072 |
+
|
| 1073 |
+
const pipelineBubbleFraction =
|
| 1074 |
+
parallelism.pp <= 1
|
| 1075 |
+
? 0
|
| 1076 |
+
: (parallelism.pp - 1) / (training.gradAccumSteps + parallelism.pp - 1)
|
| 1077 |
+
const boundaryStageCount = Math.min(
|
| 1078 |
+
parallelism.pp,
|
| 1079 |
+
Math.max(0, Math.round(pipelineBubbleFraction * parallelism.pp)),
|
| 1080 |
+
)
|
| 1081 |
+
|
| 1082 |
+
const gpuMap = placement.map((entry) => {
|
| 1083 |
+
const stageMemoryBreakdown =
|
| 1084 |
+
entry.ppStage >= 0
|
| 1085 |
+
? stageMemory.get(entry.ppStage) ?? {
|
| 1086 |
+
parametersGB: 0,
|
| 1087 |
+
optimizerStatesGB: 0,
|
| 1088 |
+
gradientsGB: 0,
|
| 1089 |
+
activationsGB: 0,
|
| 1090 |
+
totalGB: 0,
|
| 1091 |
+
}
|
| 1092 |
+
: {
|
| 1093 |
+
parametersGB: 0,
|
| 1094 |
+
optimizerStatesGB: 0,
|
| 1095 |
+
gradientsGB: 0,
|
| 1096 |
+
activationsGB: 0,
|
| 1097 |
+
totalGB: 0,
|
| 1098 |
+
}
|
| 1099 |
+
const bubbleIdle = entry.ppStage >= parallelism.pp - boundaryStageCount && entry.ppStage >= 0
|
| 1100 |
+
|
| 1101 |
+
return {
|
| 1102 |
+
globalGPUIndex: entry.globalGPUIndex,
|
| 1103 |
+
nodeIndex: entry.nodeIndex,
|
| 1104 |
+
localGPUIndex: entry.localGPUIndex,
|
| 1105 |
+
tpGroup: entry.tpGroup,
|
| 1106 |
+
tpLane: entry.tpLane,
|
| 1107 |
+
ppStage: entry.ppStage,
|
| 1108 |
+
cpShard: entry.cpShard,
|
| 1109 |
+
epLane: entry.epLane,
|
| 1110 |
+
dpReplica: entry.dpReplica,
|
| 1111 |
+
replicaGroup: entry.replicaGroup,
|
| 1112 |
+
fsdpRank: entry.fsdpRank,
|
| 1113 |
+
memoryUsedGB: round2(entry.isActive ? stageMemoryBreakdown.totalGB : 0),
|
| 1114 |
+
memoryCapacityGB: cluster.gpuType.hbmCapacityGB,
|
| 1115 |
+
isActive: entry.isActive && !bubbleIdle,
|
| 1116 |
+
}
|
| 1117 |
+
})
|
| 1118 |
+
|
| 1119 |
+
const activationBytes = getActivationBytes(training.precision)
|
| 1120 |
+
const shardedSequenceLength = training.seqLength / parallelism.cp
|
| 1121 |
+
const tokensPerMicroBatchShard = training.microBatchSize * shardedSequenceLength
|
| 1122 |
+
const collectiveMessageBytes =
|
| 1123 |
+
tokensPerMicroBatchShard * model.hiddenDim * activationBytes
|
| 1124 |
+
|
| 1125 |
+
const attentionComputeMultiplier = 0.65 + 0.35 * getAttentionMultiplier(model, training.seqLength)
|
| 1126 |
+
const activationCheckpointComputeMultiplier = training.activationCheckpointing ? 1.2 : 1
|
| 1127 |
+
const totalFlopsPerStep =
|
| 1128 |
+
6 *
|
| 1129 |
+
modelBreakdown.activeParamsPerToken *
|
| 1130 |
+
training.microBatchSize *
|
| 1131 |
+
training.seqLength *
|
| 1132 |
+
training.gradAccumSteps *
|
| 1133 |
+
derivedParallelism.dp *
|
| 1134 |
+
attentionComputeMultiplier *
|
| 1135 |
+
activationCheckpointComputeMultiplier
|
| 1136 |
+
const launchedGPUs = Math.max(totalGPUs, 1)
|
| 1137 |
+
const flopsPerGpuPerStep = totalFlopsPerStep / launchedGPUs
|
| 1138 |
+
const peakTFLOPs = getPeakTFLOPsForPrecision(cluster.gpuType, training.precision)
|
| 1139 |
+
const sustainedTFLOPs = peakTFLOPs * getSustainedComputeEfficiency(training)
|
| 1140 |
+
const computeTimePerStepMs = (flopsPerGpuPerStep / (sustainedTFLOPs * 1e12)) * 1000
|
| 1141 |
+
const pipelineBubbleTimeMs =
|
| 1142 |
+
pipelineBubbleFraction >= 1
|
| 1143 |
+
? 0
|
| 1144 |
+
: (computeTimePerStepMs * pipelineBubbleFraction) / (1 - pipelineBubbleFraction)
|
| 1145 |
+
|
| 1146 |
+
const tentativeTotalStepTimeMs = computeTimePerStepMs + pipelineBubbleTimeMs
|
| 1147 |
+
|
| 1148 |
+
const tpMembers = placement.filter(
|
| 1149 |
+
(entry) =>
|
| 1150 |
+
entry.dpReplica === 0 &&
|
| 1151 |
+
entry.ppStage === 0 &&
|
| 1152 |
+
entry.cpShard === 0 &&
|
| 1153 |
+
entry.epLane === 0 &&
|
| 1154 |
+
entry.tpLane >= 0,
|
| 1155 |
+
)
|
| 1156 |
+
const tpStats = getRingCommStats({
|
| 1157 |
+
groupCount: parallelism.pp * parallelism.cp * parallelism.ep * derivedParallelism.dp,
|
| 1158 |
+
groupWidth: parallelism.tp,
|
| 1159 |
+
messageBytes: collectiveMessageBytes,
|
| 1160 |
+
collectiveCount: TP_ALL_REDUCES_PER_LAYER * maxStageLayers * training.gradAccumSteps,
|
| 1161 |
+
membersForBandwidth: tpMembers,
|
| 1162 |
+
cluster,
|
| 1163 |
+
totalStepTimeMs: tentativeTotalStepTimeMs,
|
| 1164 |
+
})
|
| 1165 |
+
|
| 1166 |
+
const cpMembers = placement.filter(
|
| 1167 |
+
(entry) =>
|
| 1168 |
+
entry.dpReplica === 0 &&
|
| 1169 |
+
entry.ppStage === 0 &&
|
| 1170 |
+
entry.epLane === 0 &&
|
| 1171 |
+
entry.tpLane === 0 &&
|
| 1172 |
+
entry.cpShard >= 0,
|
| 1173 |
+
)
|
| 1174 |
+
const cpStats = getRingCommStats({
|
| 1175 |
+
groupCount: parallelism.pp * derivedParallelism.dp * parallelism.tp * parallelism.ep,
|
| 1176 |
+
groupWidth: parallelism.cp,
|
| 1177 |
+
messageBytes: collectiveMessageBytes,
|
| 1178 |
+
collectiveCount: CP_COLLECTIVES_PER_LAYER * maxStageLayers * training.gradAccumSteps,
|
| 1179 |
+
membersForBandwidth: cpMembers,
|
| 1180 |
+
cluster,
|
| 1181 |
+
totalStepTimeMs: tentativeTotalStepTimeMs,
|
| 1182 |
+
})
|
| 1183 |
+
|
| 1184 |
+
const averageSharedLayerParams =
|
| 1185 |
+
model.numLayers > 0
|
| 1186 |
+
? (modelBreakdown.denseLayerCount * modelBreakdown.sharedDenseLayerParams +
|
| 1187 |
+
modelBreakdown.moeLayerCount * modelBreakdown.sharedMoeLayerParams) /
|
| 1188 |
+
model.numLayers
|
| 1189 |
+
: 0
|
| 1190 |
+
const fsdpMessageBytes =
|
| 1191 |
+
parallelism.zeroStage >= 3 && derivedParallelism.fsdpDataParallelDegree > 1
|
| 1192 |
+
? (averageSharedLayerParams / parallelism.tp / derivedParallelism.fsdpDataParallelDegree) *
|
| 1193 |
+
getParameterBytes(training.precision)
|
| 1194 |
+
: 0
|
| 1195 |
+
const fsdpMembers = placement.filter(
|
| 1196 |
+
(entry) =>
|
| 1197 |
+
entry.replicaGroup === 0 &&
|
| 1198 |
+
entry.ppStage === 0 &&
|
| 1199 |
+
entry.cpShard === 0 &&
|
| 1200 |
+
entry.epLane === 0 &&
|
| 1201 |
+
entry.tpLane === 0,
|
| 1202 |
+
)
|
| 1203 |
+
const fsdpStats = getRingCommStats({
|
| 1204 |
+
groupCount:
|
| 1205 |
+
derivedParallelism.replicaGroups *
|
| 1206 |
+
parallelism.pp *
|
| 1207 |
+
parallelism.cp *
|
| 1208 |
+
parallelism.ep *
|
| 1209 |
+
parallelism.tp,
|
| 1210 |
+
groupWidth: derivedParallelism.fsdpDataParallelDegree,
|
| 1211 |
+
messageBytes: fsdpMessageBytes,
|
| 1212 |
+
collectiveCount: FSDP_COLLECTIVES_PER_LAYER * maxStageLayers * training.gradAccumSteps,
|
| 1213 |
+
membersForBandwidth: fsdpMembers,
|
| 1214 |
+
cluster,
|
| 1215 |
+
totalStepTimeMs: tentativeTotalStepTimeMs,
|
| 1216 |
+
})
|
| 1217 |
+
|
| 1218 |
+
const epMembers = placement.filter(
|
| 1219 |
+
(entry) =>
|
| 1220 |
+
entry.dpReplica === 0 &&
|
| 1221 |
+
entry.ppStage === 0 &&
|
| 1222 |
+
entry.cpShard === 0 &&
|
| 1223 |
+
entry.tpLane === 0 &&
|
| 1224 |
+
entry.epLane >= 0,
|
| 1225 |
+
)
|
| 1226 |
+
const moeLayerCount = modelBreakdown.moeLayerCount
|
| 1227 |
+
const epMessageBytes =
|
| 1228 |
+
model.architecture === 'moe' && model.moe
|
| 1229 |
+
? tokensPerMicroBatchShard *
|
| 1230 |
+
model.hiddenDim *
|
| 1231 |
+
activationBytes *
|
| 1232 |
+
model.moe.expertsPerToken
|
| 1233 |
+
: 0
|
| 1234 |
+
const epTransferCount = EP_ALL_TO_ALLS_PER_LAYER * moeLayerCount * training.gradAccumSteps
|
| 1235 |
+
const epStats = (() => {
|
| 1236 |
+
if (parallelism.ep <= 1 || epTransferCount <= 0 || epMessageBytes <= 0) {
|
| 1237 |
+
return {
|
| 1238 |
+
totalVolumeBytes: 0,
|
| 1239 |
+
timePerStepMs: 0,
|
| 1240 |
+
linkUtilizationPercent: 0,
|
| 1241 |
+
usesInterNode: false,
|
| 1242 |
+
}
|
| 1243 |
+
}
|
| 1244 |
+
|
| 1245 |
+
const { bandwidthGBs, usesInterNode } = getMaxBandwidthForCollective(epMembers, cluster)
|
| 1246 |
+
const volumeBytesPerGpu = epMessageBytes * epTransferCount * 2
|
| 1247 |
+
const totalVolumeBytes =
|
| 1248 |
+
volumeBytesPerGpu *
|
| 1249 |
+
parallelism.ep *
|
| 1250 |
+
parallelism.pp *
|
| 1251 |
+
parallelism.cp *
|
| 1252 |
+
parallelism.tp *
|
| 1253 |
+
derivedParallelism.dp
|
| 1254 |
+
const timePerStepMs = (bytesToGB(volumeBytesPerGpu) / bandwidthGBs) * 1000
|
| 1255 |
+
const linkUtilizationPercent =
|
| 1256 |
+
tentativeTotalStepTimeMs > 0
|
| 1257 |
+
? clamp(
|
| 1258 |
+
(bytesToGB(volumeBytesPerGpu) /
|
| 1259 |
+
(bandwidthGBs * (tentativeTotalStepTimeMs / 1000))) *
|
| 1260 |
+
100,
|
| 1261 |
+
0,
|
| 1262 |
+
100,
|
| 1263 |
+
)
|
| 1264 |
+
: 0
|
| 1265 |
+
|
| 1266 |
+
return {
|
| 1267 |
+
totalVolumeBytes,
|
| 1268 |
+
timePerStepMs,
|
| 1269 |
+
linkUtilizationPercent,
|
| 1270 |
+
usesInterNode,
|
| 1271 |
+
}
|
| 1272 |
+
})()
|
| 1273 |
+
|
| 1274 |
+
let ppTotalVolumeBytes = 0
|
| 1275 |
+
let ppTimePerStepMs = 0
|
| 1276 |
+
let ppUsesInterNode = false
|
| 1277 |
+
|
| 1278 |
+
for (let dpReplica = 0; dpReplica < derivedParallelism.dp; dpReplica += 1) {
|
| 1279 |
+
for (let cpShard = 0; cpShard < parallelism.cp; cpShard += 1) {
|
| 1280 |
+
for (let stageIndex = 0; stageIndex < parallelism.pp - 1; stageIndex += 1) {
|
| 1281 |
+
const source = getPlacementEntry(placement, {
|
| 1282 |
+
dpReplica,
|
| 1283 |
+
ppStage: stageIndex,
|
| 1284 |
+
cpShard,
|
| 1285 |
+
epLane: 0,
|
| 1286 |
+
tpLane: 0,
|
| 1287 |
+
})
|
| 1288 |
+
const target = getPlacementEntry(placement, {
|
| 1289 |
+
dpReplica,
|
| 1290 |
+
ppStage: stageIndex + 1,
|
| 1291 |
+
cpShard,
|
| 1292 |
+
epLane: 0,
|
| 1293 |
+
tpLane: 0,
|
| 1294 |
+
})
|
| 1295 |
+
|
| 1296 |
+
if (!source || !target) {
|
| 1297 |
+
continue
|
| 1298 |
+
}
|
| 1299 |
+
|
| 1300 |
+
const usesInterNode = source.nodeIndex !== target.nodeIndex
|
| 1301 |
+
const bandwidthGBs = usesInterNode
|
| 1302 |
+
? cluster.interNodeBandwidthGBs
|
| 1303 |
+
: cluster.intraNodeBandwidthGBs
|
| 1304 |
+
const perLaneBytes = collectiveMessageBytes / parallelism.tp
|
| 1305 |
+
|
| 1306 |
+
ppUsesInterNode ||= usesInterNode
|
| 1307 |
+
ppTotalVolumeBytes += collectiveMessageBytes * 2 * training.gradAccumSteps
|
| 1308 |
+
ppTimePerStepMs +=
|
| 1309 |
+
(bytesToGB(perLaneBytes) / bandwidthGBs) * 1000 * 2 * training.gradAccumSteps
|
| 1310 |
+
}
|
| 1311 |
+
}
|
| 1312 |
+
}
|
| 1313 |
+
|
| 1314 |
+
const maxStageGradientBytes = Math.max(
|
| 1315 |
+
...Array.from(stageMemory.values()).map((stage) => stage.gradientsGB * BYTES_PER_GB),
|
| 1316 |
+
0,
|
| 1317 |
+
)
|
| 1318 |
+
const dpGroupWidth =
|
| 1319 |
+
parallelism.fsdpShardGroupSize > 1
|
| 1320 |
+
? derivedParallelism.replicaGroups
|
| 1321 |
+
: derivedParallelism.dp
|
| 1322 |
+
const dpMembers = parallelism.fsdpShardGroupSize > 1
|
| 1323 |
+
? placement.filter(
|
| 1324 |
+
(entry) =>
|
| 1325 |
+
entry.fsdpRank === 0 &&
|
| 1326 |
+
entry.ppStage === 0 &&
|
| 1327 |
+
entry.cpShard === 0 &&
|
| 1328 |
+
entry.epLane === 0 &&
|
| 1329 |
+
entry.tpLane === 0,
|
| 1330 |
+
)
|
| 1331 |
+
: placement.filter(
|
| 1332 |
+
(entry) =>
|
| 1333 |
+
entry.ppStage === 0 &&
|
| 1334 |
+
entry.cpShard === 0 &&
|
| 1335 |
+
entry.epLane === 0 &&
|
| 1336 |
+
entry.tpLane === 0,
|
| 1337 |
+
)
|
| 1338 |
+
const gradientCommBytesPerGpu =
|
| 1339 |
+
dpGroupWidth > 1
|
| 1340 |
+
? (2 * (dpGroupWidth - 1) * maxStageGradientBytes) / dpGroupWidth
|
| 1341 |
+
: 0
|
| 1342 |
+
const dpBandwidth = getMaxBandwidthForCollective(dpMembers, cluster)
|
| 1343 |
+
const dpTimeMs =
|
| 1344 |
+
dpGroupWidth > 1
|
| 1345 |
+
? (bytesToGB(gradientCommBytesPerGpu) / dpBandwidth.bandwidthGBs) * 1000
|
| 1346 |
+
: 0
|
| 1347 |
+
const canOverlapDp = dpGroupWidth > 1 && (parallelism.pp > 1 || training.gradAccumSteps > 1)
|
| 1348 |
+
const dpNonOverlappedTimeMs = dpTimeMs * (canOverlapDp ? 0.35 : 1)
|
| 1349 |
+
|
| 1350 |
+
const communicationTimePerStepMs =
|
| 1351 |
+
tpStats.timePerStepMs +
|
| 1352 |
+
cpStats.timePerStepMs +
|
| 1353 |
+
fsdpStats.timePerStepMs +
|
| 1354 |
+
epStats.timePerStepMs +
|
| 1355 |
+
ppTimePerStepMs +
|
| 1356 |
+
dpNonOverlappedTimeMs
|
| 1357 |
+
const totalStepTimeMs =
|
| 1358 |
+
computeTimePerStepMs + pipelineBubbleTimeMs + communicationTimePerStepMs
|
| 1359 |
+
const tokensPerSecond =
|
| 1360 |
+
totalStepTimeMs > 0 ? globalBatchSizeTokens / (totalStepTimeMs / 1000) : 0
|
| 1361 |
+
const mfu =
|
| 1362 |
+
tokensPerSecond > 0
|
| 1363 |
+
? clamp(
|
| 1364 |
+
(6 * modelBreakdown.activeParamsPerToken * attentionComputeMultiplier * tokensPerSecond) /
|
| 1365 |
+
(launchedGPUs * peakTFLOPs * 1e12),
|
| 1366 |
+
0,
|
| 1367 |
+
1,
|
| 1368 |
+
)
|
| 1369 |
+
: 0
|
| 1370 |
+
|
| 1371 |
+
const dpLinkUtilizationPercent =
|
| 1372 |
+
dpGroupWidth > 1 && totalStepTimeMs > 0
|
| 1373 |
+
? clamp(
|
| 1374 |
+
(bytesToGB(gradientCommBytesPerGpu) /
|
| 1375 |
+
(dpBandwidth.bandwidthGBs * (totalStepTimeMs / 1000))) *
|
| 1376 |
+
100,
|
| 1377 |
+
0,
|
| 1378 |
+
100,
|
| 1379 |
+
)
|
| 1380 |
+
: 0
|
| 1381 |
+
|
| 1382 |
+
const ppPerLaneVolumeGB =
|
| 1383 |
+
parallelism.pp > 1
|
| 1384 |
+
? bytesToGB(collectiveMessageBytes / parallelism.tp) * 2 * training.gradAccumSteps
|
| 1385 |
+
: 0
|
| 1386 |
+
const ppLinkUtilizationPercent =
|
| 1387 |
+
parallelism.pp > 1 && totalStepTimeMs > 0
|
| 1388 |
+
? clamp(
|
| 1389 |
+
(ppPerLaneVolumeGB /
|
| 1390 |
+
((ppUsesInterNode
|
| 1391 |
+
? cluster.interNodeBandwidthGBs
|
| 1392 |
+
: cluster.intraNodeBandwidthGBs) *
|
| 1393 |
+
(totalStepTimeMs / 1000))) *
|
| 1394 |
+
100,
|
| 1395 |
+
0,
|
| 1396 |
+
100,
|
| 1397 |
+
)
|
| 1398 |
+
: 0
|
| 1399 |
+
|
| 1400 |
+
const links: ClusterAnalysis['links'] = []
|
| 1401 |
+
const visualReplicaSamples = Math.min(derivedParallelism.dp, 12)
|
| 1402 |
+
const sampledDpReplicas = Array.from({ length: visualReplicaSamples }, (_, sampleIndex) =>
|
| 1403 |
+
Math.floor((sampleIndex * derivedParallelism.dp) / visualReplicaSamples),
|
| 1404 |
+
)
|
| 1405 |
+
|
| 1406 |
+
for (const dpReplica of sampledDpReplicas) {
|
| 1407 |
+
for (let ppStage = 0; ppStage < parallelism.pp; ppStage += 1) {
|
| 1408 |
+
for (let cpShard = 0; cpShard < parallelism.cp; cpShard += 1) {
|
| 1409 |
+
for (let epLane = 0; epLane < parallelism.ep; epLane += 1) {
|
| 1410 |
+
const tpEntries = placement
|
| 1411 |
+
.filter(
|
| 1412 |
+
(entry) =>
|
| 1413 |
+
entry.dpReplica === dpReplica &&
|
| 1414 |
+
entry.ppStage === ppStage &&
|
| 1415 |
+
entry.cpShard === cpShard &&
|
| 1416 |
+
entry.epLane === epLane,
|
| 1417 |
+
)
|
| 1418 |
+
.sort((left, right) => left.tpLane - right.tpLane)
|
| 1419 |
+
|
| 1420 |
+
if (parallelism.tp > 1) {
|
| 1421 |
+
for (let lane = 0; lane < tpEntries.length; lane += 1) {
|
| 1422 |
+
const from = tpEntries[lane]
|
| 1423 |
+
const to = tpEntries[(lane + 1) % tpEntries.length]
|
| 1424 |
+
|
| 1425 |
+
links.push({
|
| 1426 |
+
fromGPU: from.globalGPUIndex,
|
| 1427 |
+
toGPU: to.globalGPUIndex,
|
| 1428 |
+
type: 'nvlink',
|
| 1429 |
+
trafficType: 'tp',
|
| 1430 |
+
volumeGB: round2(bytesToGB(tpStats.volumeBytesPerGpu)),
|
| 1431 |
+
utilizationPercent: round2(tpStats.linkUtilizationPercent),
|
| 1432 |
+
})
|
| 1433 |
+
}
|
| 1434 |
+
}
|
| 1435 |
+
|
| 1436 |
+
if (ppStage < parallelism.pp - 1) {
|
| 1437 |
+
const nextTpEntries = placement
|
| 1438 |
+
.filter(
|
| 1439 |
+
(entry) =>
|
| 1440 |
+
entry.dpReplica === dpReplica &&
|
| 1441 |
+
entry.ppStage === ppStage + 1 &&
|
| 1442 |
+
entry.cpShard === cpShard &&
|
| 1443 |
+
entry.epLane === epLane,
|
| 1444 |
+
)
|
| 1445 |
+
.sort((left, right) => left.tpLane - right.tpLane)
|
| 1446 |
+
|
| 1447 |
+
for (let lane = 0; lane < Math.min(tpEntries.length, nextTpEntries.length); lane += 1) {
|
| 1448 |
+
const from = tpEntries[lane]
|
| 1449 |
+
const to = nextTpEntries[lane]
|
| 1450 |
+
links.push({
|
| 1451 |
+
fromGPU: from.globalGPUIndex,
|
| 1452 |
+
toGPU: to.globalGPUIndex,
|
| 1453 |
+
type: from.nodeIndex === to.nodeIndex ? 'nvlink' : 'infiniband',
|
| 1454 |
+
trafficType: 'pp',
|
| 1455 |
+
volumeGB: round2(ppPerLaneVolumeGB),
|
| 1456 |
+
utilizationPercent: round2(ppLinkUtilizationPercent),
|
| 1457 |
+
})
|
| 1458 |
+
}
|
| 1459 |
+
}
|
| 1460 |
+
}
|
| 1461 |
+
}
|
| 1462 |
+
|
| 1463 |
+
if (parallelism.cp > 1) {
|
| 1464 |
+
for (let epLane = 0; epLane < parallelism.ep; epLane += 1) {
|
| 1465 |
+
for (let tpLane = 0; tpLane < parallelism.tp; tpLane += 1) {
|
| 1466 |
+
const cpEntries = placement
|
| 1467 |
+
.filter(
|
| 1468 |
+
(entry) =>
|
| 1469 |
+
entry.dpReplica === dpReplica &&
|
| 1470 |
+
entry.ppStage === ppStage &&
|
| 1471 |
+
entry.epLane === epLane &&
|
| 1472 |
+
entry.tpLane === tpLane,
|
| 1473 |
+
)
|
| 1474 |
+
.sort((left, right) => left.cpShard - right.cpShard)
|
| 1475 |
+
|
| 1476 |
+
for (let shardIndex = 0; shardIndex < cpEntries.length; shardIndex += 1) {
|
| 1477 |
+
const from = cpEntries[shardIndex]
|
| 1478 |
+
const to = cpEntries[(shardIndex + 1) % cpEntries.length]
|
| 1479 |
+
links.push({
|
| 1480 |
+
fromGPU: from.globalGPUIndex,
|
| 1481 |
+
toGPU: to.globalGPUIndex,
|
| 1482 |
+
type: from.nodeIndex === to.nodeIndex ? 'nvlink' : 'infiniband',
|
| 1483 |
+
trafficType: 'cp',
|
| 1484 |
+
volumeGB: round2(bytesToGB(cpStats.volumeBytesPerGpu)),
|
| 1485 |
+
utilizationPercent: round2(cpStats.linkUtilizationPercent),
|
| 1486 |
+
})
|
| 1487 |
+
}
|
| 1488 |
+
}
|
| 1489 |
+
}
|
| 1490 |
+
}
|
| 1491 |
+
|
| 1492 |
+
if (parallelism.ep > 1) {
|
| 1493 |
+
for (let cpShard = 0; cpShard < parallelism.cp; cpShard += 1) {
|
| 1494 |
+
for (let tpLane = 0; tpLane < parallelism.tp; tpLane += 1) {
|
| 1495 |
+
const epEntries = placement
|
| 1496 |
+
.filter(
|
| 1497 |
+
(entry) =>
|
| 1498 |
+
entry.dpReplica === dpReplica &&
|
| 1499 |
+
entry.ppStage === ppStage &&
|
| 1500 |
+
entry.cpShard === cpShard &&
|
| 1501 |
+
entry.tpLane === tpLane,
|
| 1502 |
+
)
|
| 1503 |
+
.sort((left, right) => left.epLane - right.epLane)
|
| 1504 |
+
|
| 1505 |
+
for (let lane = 0; lane < epEntries.length; lane += 1) {
|
| 1506 |
+
const from = epEntries[lane]
|
| 1507 |
+
const to = epEntries[(lane + 1) % epEntries.length]
|
| 1508 |
+
links.push({
|
| 1509 |
+
fromGPU: from.globalGPUIndex,
|
| 1510 |
+
toGPU: to.globalGPUIndex,
|
| 1511 |
+
type: from.nodeIndex === to.nodeIndex ? 'nvlink' : 'infiniband',
|
| 1512 |
+
trafficType: 'ep',
|
| 1513 |
+
volumeGB: round2(
|
| 1514 |
+
epStats.totalVolumeBytes > 0
|
| 1515 |
+
? bytesToGB(epStats.totalVolumeBytes) /
|
| 1516 |
+
(parallelism.ep *
|
| 1517 |
+
Math.max(parallelism.tp * parallelism.cp * parallelism.pp * derivedParallelism.dp, 1))
|
| 1518 |
+
: 0,
|
| 1519 |
+
),
|
| 1520 |
+
utilizationPercent: round2(epStats.linkUtilizationPercent),
|
| 1521 |
+
})
|
| 1522 |
+
}
|
| 1523 |
+
}
|
| 1524 |
+
}
|
| 1525 |
+
}
|
| 1526 |
+
|
| 1527 |
+
if (derivedParallelism.fsdpDataParallelDegree > 1) {
|
| 1528 |
+
for (let cpShard = 0; cpShard < parallelism.cp; cpShard += 1) {
|
| 1529 |
+
for (let epLane = 0; epLane < parallelism.ep; epLane += 1) {
|
| 1530 |
+
for (let tpLane = 0; tpLane < parallelism.tp; tpLane += 1) {
|
| 1531 |
+
const fsdpEntries = placement
|
| 1532 |
+
.filter(
|
| 1533 |
+
(entry) =>
|
| 1534 |
+
entry.replicaGroup === placement.find((item) => item.dpReplica === dpReplica)?.replicaGroup &&
|
| 1535 |
+
entry.ppStage === ppStage &&
|
| 1536 |
+
entry.cpShard === cpShard &&
|
| 1537 |
+
entry.epLane === epLane &&
|
| 1538 |
+
entry.tpLane === tpLane,
|
| 1539 |
+
)
|
| 1540 |
+
.sort((left, right) => left.fsdpRank - right.fsdpRank)
|
| 1541 |
+
|
| 1542 |
+
for (let rank = 0; rank < fsdpEntries.length; rank += 1) {
|
| 1543 |
+
const from = fsdpEntries[rank]
|
| 1544 |
+
const to = fsdpEntries[(rank + 1) % fsdpEntries.length]
|
| 1545 |
+
links.push({
|
| 1546 |
+
fromGPU: from.globalGPUIndex,
|
| 1547 |
+
toGPU: to.globalGPUIndex,
|
| 1548 |
+
type: from.nodeIndex === to.nodeIndex ? 'nvlink' : 'infiniband',
|
| 1549 |
+
trafficType: 'fsdp',
|
| 1550 |
+
volumeGB: round2(bytesToGB(fsdpStats.volumeBytesPerGpu)),
|
| 1551 |
+
utilizationPercent: round2(fsdpStats.linkUtilizationPercent),
|
| 1552 |
+
})
|
| 1553 |
+
}
|
| 1554 |
+
}
|
| 1555 |
+
}
|
| 1556 |
+
}
|
| 1557 |
+
}
|
| 1558 |
+
|
| 1559 |
+
if (dpGroupWidth > 1) {
|
| 1560 |
+
for (let cpShard = 0; cpShard < parallelism.cp; cpShard += 1) {
|
| 1561 |
+
for (let epLane = 0; epLane < parallelism.ep; epLane += 1) {
|
| 1562 |
+
for (let tpLane = 0; tpLane < parallelism.tp; tpLane += 1) {
|
| 1563 |
+
const current = placement.find((entry) => entry.dpReplica === dpReplica)
|
| 1564 |
+
if (!current) {
|
| 1565 |
+
continue
|
| 1566 |
+
}
|
| 1567 |
+
|
| 1568 |
+
const from = getPlacementEntry(placement, {
|
| 1569 |
+
replicaGroup:
|
| 1570 |
+
parallelism.fsdpShardGroupSize > 1 ? current.replicaGroup : undefined,
|
| 1571 |
+
fsdpRank: parallelism.fsdpShardGroupSize > 1 ? current.fsdpRank : undefined,
|
| 1572 |
+
dpReplica: parallelism.fsdpShardGroupSize > 1 ? undefined : dpReplica,
|
| 1573 |
+
ppStage,
|
| 1574 |
+
cpShard,
|
| 1575 |
+
epLane,
|
| 1576 |
+
tpLane,
|
| 1577 |
+
})
|
| 1578 |
+
const to = getPlacementEntry(placement, {
|
| 1579 |
+
replicaGroup:
|
| 1580 |
+
parallelism.fsdpShardGroupSize > 1
|
| 1581 |
+
? (current.replicaGroup + 1) % derivedParallelism.replicaGroups
|
| 1582 |
+
: undefined,
|
| 1583 |
+
fsdpRank: parallelism.fsdpShardGroupSize > 1 ? current.fsdpRank : undefined,
|
| 1584 |
+
dpReplica:
|
| 1585 |
+
parallelism.fsdpShardGroupSize > 1
|
| 1586 |
+
? undefined
|
| 1587 |
+
: (dpReplica + 1) % derivedParallelism.dp,
|
| 1588 |
+
ppStage,
|
| 1589 |
+
cpShard,
|
| 1590 |
+
epLane,
|
| 1591 |
+
tpLane,
|
| 1592 |
+
})
|
| 1593 |
+
|
| 1594 |
+
if (!from || !to) {
|
| 1595 |
+
continue
|
| 1596 |
+
}
|
| 1597 |
+
|
| 1598 |
+
links.push({
|
| 1599 |
+
fromGPU: from.globalGPUIndex,
|
| 1600 |
+
toGPU: to.globalGPUIndex,
|
| 1601 |
+
type: from.nodeIndex === to.nodeIndex ? 'nvlink' : 'infiniband',
|
| 1602 |
+
trafficType: 'dp',
|
| 1603 |
+
volumeGB: round2(bytesToGB(gradientCommBytesPerGpu)),
|
| 1604 |
+
utilizationPercent: round2(dpLinkUtilizationPercent),
|
| 1605 |
+
})
|
| 1606 |
+
}
|
| 1607 |
+
}
|
| 1608 |
+
}
|
| 1609 |
+
}
|
| 1610 |
+
}
|
| 1611 |
+
}
|
| 1612 |
+
|
| 1613 |
+
const feasible = worstStageMemory.totalGB <= cluster.gpuType.hbmCapacityGB
|
| 1614 |
+
const infeasibilityReason = feasible
|
| 1615 |
+
? undefined
|
| 1616 |
+
: `Stage ${worstStageIndex} uses ${round2(worstStageMemory.totalGB)} GB per GPU, exceeding ${cluster.gpuType.hbmCapacityGB} GB of HBM.`
|
| 1617 |
+
|
| 1618 |
+
return {
|
| 1619 |
+
feasible,
|
| 1620 |
+
infeasibilityReason,
|
| 1621 |
+
totalParams: Math.round(modelBreakdown.totalParams),
|
| 1622 |
+
activeParamsPerToken: Math.round(modelBreakdown.activeParamsPerToken),
|
| 1623 |
+
globalBatchSizeTokens,
|
| 1624 |
+
totalGPUs,
|
| 1625 |
+
derivedParallelism: {
|
| 1626 |
+
dp: derivedParallelism.dp,
|
| 1627 |
+
replicaGroups: derivedParallelism.replicaGroups,
|
| 1628 |
+
fsdpShardGroupSize: parallelism.fsdpShardGroupSize,
|
| 1629 |
+
fsdpGroupSize: derivedParallelism.fsdpGroupSize,
|
| 1630 |
+
ep: parallelism.ep,
|
| 1631 |
+
},
|
| 1632 |
+
memoryBreakdown: {
|
| 1633 |
+
parametersGB: round2(worstStageMemory.parametersGB),
|
| 1634 |
+
optimizerStatesGB: round2(worstStageMemory.optimizerStatesGB),
|
| 1635 |
+
gradientsGB: round2(worstStageMemory.gradientsGB),
|
| 1636 |
+
activationsGB: round2(worstStageMemory.activationsGB),
|
| 1637 |
+
totalGB: round2(worstStageMemory.totalGB),
|
| 1638 |
+
hbmCapacityGB: cluster.gpuType.hbmCapacityGB,
|
| 1639 |
+
utilizationPercent: round2(
|
| 1640 |
+
(worstStageMemory.totalGB / cluster.gpuType.hbmCapacityGB) * 100,
|
| 1641 |
+
),
|
| 1642 |
+
},
|
| 1643 |
+
pipelineStages,
|
| 1644 |
+
communication: {
|
| 1645 |
+
tp: {
|
| 1646 |
+
allReducesPerLayer: TP_ALL_REDUCES_PER_LAYER,
|
| 1647 |
+
messageSizeBytes: collectiveMessageBytes,
|
| 1648 |
+
totalVolumePerStepGB: round2(bytesToGB(tpStats.totalVolumeBytes)),
|
| 1649 |
+
timePerStepMs: round2(tpStats.timePerStepMs),
|
| 1650 |
+
linkUtilizationPercent: round2(tpStats.linkUtilizationPercent),
|
| 1651 |
+
},
|
| 1652 |
+
pp: {
|
| 1653 |
+
activationMessageSizeBytes: collectiveMessageBytes,
|
| 1654 |
+
numP2PTransfersPerStep:
|
| 1655 |
+
parallelism.pp > 1
|
| 1656 |
+
? 2 *
|
| 1657 |
+
(parallelism.pp - 1) *
|
| 1658 |
+
training.gradAccumSteps *
|
| 1659 |
+
parallelism.cp *
|
| 1660 |
+
parallelism.tp *
|
| 1661 |
+
derivedParallelism.dp
|
| 1662 |
+
: 0,
|
| 1663 |
+
totalVolumePerStepGB: round2(bytesToGB(ppTotalVolumeBytes)),
|
| 1664 |
+
timePerStepMs: round2(ppTimePerStepMs),
|
| 1665 |
+
usesInterNode: ppUsesInterNode,
|
| 1666 |
+
},
|
| 1667 |
+
cp: {
|
| 1668 |
+
collectivesPerLayer: CP_COLLECTIVES_PER_LAYER,
|
| 1669 |
+
messageSizeBytes: collectiveMessageBytes,
|
| 1670 |
+
totalVolumePerStepGB: round2(bytesToGB(cpStats.totalVolumeBytes)),
|
| 1671 |
+
timePerStepMs: round2(cpStats.timePerStepMs),
|
| 1672 |
+
linkUtilizationPercent: round2(cpStats.linkUtilizationPercent),
|
| 1673 |
+
usesInterNode: cpStats.usesInterNode,
|
| 1674 |
+
},
|
| 1675 |
+
fsdp: {
|
| 1676 |
+
collectivesPerLayer: FSDP_COLLECTIVES_PER_LAYER,
|
| 1677 |
+
messageSizeBytes: round2(fsdpMessageBytes),
|
| 1678 |
+
totalVolumePerStepGB: round2(bytesToGB(fsdpStats.totalVolumeBytes)),
|
| 1679 |
+
timePerStepMs: round2(fsdpStats.timePerStepMs),
|
| 1680 |
+
linkUtilizationPercent: round2(fsdpStats.linkUtilizationPercent),
|
| 1681 |
+
usesInterNode: fsdpStats.usesInterNode,
|
| 1682 |
+
},
|
| 1683 |
+
ep: {
|
| 1684 |
+
allToAllsPerLayer: EP_ALL_TO_ALLS_PER_LAYER,
|
| 1685 |
+
messageSizeBytes: round2(epMessageBytes),
|
| 1686 |
+
totalVolumePerStepGB: round2(bytesToGB(epStats.totalVolumeBytes)),
|
| 1687 |
+
timePerStepMs: round2(epStats.timePerStepMs),
|
| 1688 |
+
linkUtilizationPercent: round2(epStats.linkUtilizationPercent),
|
| 1689 |
+
usesInterNode: epStats.usesInterNode,
|
| 1690 |
+
},
|
| 1691 |
+
dp: {
|
| 1692 |
+
gradientVolumePerGPU_GB: round2(bytesToGB(gradientCommBytesPerGpu)),
|
| 1693 |
+
allReduceTimeMs: round2(dpTimeMs),
|
| 1694 |
+
canOverlapWithBackward: canOverlapDp,
|
| 1695 |
+
linkUtilizationPercent: round2(dpLinkUtilizationPercent),
|
| 1696 |
+
},
|
| 1697 |
+
},
|
| 1698 |
+
throughput: {
|
| 1699 |
+
computeTimePerStepMs: round2(computeTimePerStepMs),
|
| 1700 |
+
communicationTimePerStepMs: round2(communicationTimePerStepMs),
|
| 1701 |
+
pipelineBubbleFraction: round2(pipelineBubbleFraction),
|
| 1702 |
+
pipelineBubbleTimeMs: round2(pipelineBubbleTimeMs),
|
| 1703 |
+
totalStepTimeMs: round2(totalStepTimeMs),
|
| 1704 |
+
tokensPerSecond: round2(tokensPerSecond),
|
| 1705 |
+
mfu: round2(mfu),
|
| 1706 |
+
},
|
| 1707 |
+
gpuMap,
|
| 1708 |
+
links,
|
| 1709 |
+
}
|
| 1710 |
+
}
|
| 1711 |
+
|
| 1712 |
+
export const llama7B = (): ModelConfig => ({
|
| 1713 |
+
architecture: 'dense',
|
| 1714 |
+
hiddenDim: 4096,
|
| 1715 |
+
numLayers: 32,
|
| 1716 |
+
numHeads: 32,
|
| 1717 |
+
numKVHeads: 32,
|
| 1718 |
+
vocabSize: 32000,
|
| 1719 |
+
intermediateSize: 11008,
|
| 1720 |
+
tiedEmbeddings: false,
|
| 1721 |
+
attentionProfile: {
|
| 1722 |
+
type: 'full',
|
| 1723 |
+
},
|
| 1724 |
+
})
|
| 1725 |
+
|
| 1726 |
+
export const llama70B = (): ModelConfig => ({
|
| 1727 |
+
architecture: 'dense',
|
| 1728 |
+
hiddenDim: 8192,
|
| 1729 |
+
numLayers: 80,
|
| 1730 |
+
numHeads: 64,
|
| 1731 |
+
numKVHeads: 8,
|
| 1732 |
+
vocabSize: 32000,
|
| 1733 |
+
intermediateSize: 28672,
|
| 1734 |
+
tiedEmbeddings: false,
|
| 1735 |
+
attentionProfile: {
|
| 1736 |
+
type: 'full',
|
| 1737 |
+
},
|
| 1738 |
+
})
|
| 1739 |
+
|
| 1740 |
+
export const llama405B = (): ModelConfig => ({
|
| 1741 |
+
architecture: 'dense',
|
| 1742 |
+
hiddenDim: 16384,
|
| 1743 |
+
numLayers: 126,
|
| 1744 |
+
numHeads: 128,
|
| 1745 |
+
numKVHeads: 8,
|
| 1746 |
+
vocabSize: 128256,
|
| 1747 |
+
intermediateSize: 53248,
|
| 1748 |
+
tiedEmbeddings: false,
|
| 1749 |
+
attentionProfile: {
|
| 1750 |
+
type: 'full',
|
| 1751 |
+
},
|
| 1752 |
+
})
|
| 1753 |
+
|
| 1754 |
+
export const olmo3_32B = (): ModelConfig => ({
|
| 1755 |
+
architecture: 'dense',
|
| 1756 |
+
hiddenDim: 5120,
|
| 1757 |
+
numLayers: 64,
|
| 1758 |
+
numHeads: 40,
|
| 1759 |
+
numKVHeads: 8,
|
| 1760 |
+
vocabSize: 100278,
|
| 1761 |
+
intermediateSize: 27648,
|
| 1762 |
+
tiedEmbeddings: false,
|
| 1763 |
+
attentionProfile: {
|
| 1764 |
+
type: 'hybrid',
|
| 1765 |
+
slidingWindowSize: 4096,
|
| 1766 |
+
globalAttentionFraction: 0.25,
|
| 1767 |
+
},
|
| 1768 |
+
})
|
| 1769 |
+
|
| 1770 |
+
export const llama31_405B = (): ModelConfig => ({
|
| 1771 |
+
architecture: 'dense',
|
| 1772 |
+
hiddenDim: 16384,
|
| 1773 |
+
numLayers: 126,
|
| 1774 |
+
numHeads: 128,
|
| 1775 |
+
numKVHeads: 8,
|
| 1776 |
+
vocabSize: 128256,
|
| 1777 |
+
intermediateSize: 53248,
|
| 1778 |
+
tiedEmbeddings: false,
|
| 1779 |
+
attentionProfile: {
|
| 1780 |
+
type: 'full',
|
| 1781 |
+
},
|
| 1782 |
+
})
|
| 1783 |
+
|
| 1784 |
+
export const trinityLarge400B = (): ModelConfig => ({
|
| 1785 |
+
architecture: 'moe',
|
| 1786 |
+
hiddenDim: 3072,
|
| 1787 |
+
numLayers: 60,
|
| 1788 |
+
numHeads: 48,
|
| 1789 |
+
numKVHeads: 8,
|
| 1790 |
+
vocabSize: 200192,
|
| 1791 |
+
intermediateSize: 12288,
|
| 1792 |
+
tiedEmbeddings: false,
|
| 1793 |
+
attentionProfile: {
|
| 1794 |
+
type: 'hybrid',
|
| 1795 |
+
slidingWindowSize: 4096,
|
| 1796 |
+
globalAttentionEveryN: 4,
|
| 1797 |
+
},
|
| 1798 |
+
moe: {
|
| 1799 |
+
numExperts: 256,
|
| 1800 |
+
expertsPerToken: 4,
|
| 1801 |
+
numDenseLayers: 6,
|
| 1802 |
+
expertIntermediateSize: 3072,
|
| 1803 |
+
activeParamsPerToken: 13_000_000_000,
|
| 1804 |
+
},
|
| 1805 |
+
})
|
| 1806 |
+
|
| 1807 |
+
export const a100_80gb = (): GPUSpec => ({
|
| 1808 |
+
name: 'A100 80GB',
|
| 1809 |
+
hbmCapacityGB: 80,
|
| 1810 |
+
peakTFLOPsBF16: 312,
|
| 1811 |
+
memBandwidthTBs: 2,
|
| 1812 |
+
})
|
| 1813 |
+
|
| 1814 |
+
export const h100_sxm = (): GPUSpec => ({
|
| 1815 |
+
name: 'H100 SXM',
|
| 1816 |
+
hbmCapacityGB: 80,
|
| 1817 |
+
peakTFLOPsBF16: 989,
|
| 1818 |
+
memBandwidthTBs: 3.35,
|
| 1819 |
+
})
|
| 1820 |
+
|
| 1821 |
+
export const b300 = (): GPUSpec => ({
|
| 1822 |
+
name: 'B300',
|
| 1823 |
+
hbmCapacityGB: 192,
|
| 1824 |
+
peakTFLOPsBF16: 2250,
|
| 1825 |
+
memBandwidthTBs: 8,
|
| 1826 |
+
})
|
| 1827 |
+
|
| 1828 |
+
export const gb200 = (): GPUSpec => ({
|
| 1829 |
+
name: 'GB200',
|
| 1830 |
+
hbmCapacityGB: 192,
|
| 1831 |
+
peakTFLOPsBF16: 2250,
|
| 1832 |
+
memBandwidthTBs: 8,
|
| 1833 |
+
})
|
| 1834 |
+
|
| 1835 |
+
export const singleNode8GPU = (gpuType: GPUSpec = a100_80gb()): ClusterConfig => {
|
| 1836 |
+
const fabric = getDefaultFabric(gpuType)
|
| 1837 |
+
|
| 1838 |
+
return {
|
| 1839 |
+
gpuType,
|
| 1840 |
+
gpusPerNode: 8,
|
| 1841 |
+
numNodes: 1,
|
| 1842 |
+
intraNodeBandwidthGBs: fabric.intraNodeBandwidthGBs,
|
| 1843 |
+
interNodeBandwidthGBs: fabric.interNodeBandwidthGBs,
|
| 1844 |
+
nodesPerRack: 1,
|
| 1845 |
+
rackLabel: 'node',
|
| 1846 |
+
nodeLabel: 'GPU host',
|
| 1847 |
+
podLabel: 'node',
|
| 1848 |
+
}
|
| 1849 |
+
}
|
| 1850 |
+
|
| 1851 |
+
export const cluster64GPU = (gpuType: GPUSpec = h100_sxm()): ClusterConfig => {
|
| 1852 |
+
const fabric = getDefaultFabric(gpuType)
|
| 1853 |
+
|
| 1854 |
+
return {
|
| 1855 |
+
gpuType,
|
| 1856 |
+
gpusPerNode: 8,
|
| 1857 |
+
numNodes: 8,
|
| 1858 |
+
intraNodeBandwidthGBs: fabric.intraNodeBandwidthGBs,
|
| 1859 |
+
interNodeBandwidthGBs: fabric.interNodeBandwidthGBs,
|
| 1860 |
+
nodesPerRack: 4,
|
| 1861 |
+
rackLabel: 'rack',
|
| 1862 |
+
nodeLabel: 'GPU host',
|
| 1863 |
+
podLabel: 'rack',
|
| 1864 |
+
}
|
| 1865 |
+
}
|
| 1866 |
+
|
| 1867 |
+
export const frontier576GPU = (): ClusterConfig => {
|
| 1868 |
+
const gpuType = gb200()
|
| 1869 |
+
const fabric = getDefaultFabric(gpuType)
|
| 1870 |
+
|
| 1871 |
+
return {
|
| 1872 |
+
gpuType,
|
| 1873 |
+
gpusPerNode: 8,
|
| 1874 |
+
numNodes: 72,
|
| 1875 |
+
intraNodeBandwidthGBs: fabric.intraNodeBandwidthGBs,
|
| 1876 |
+
interNodeBandwidthGBs: fabric.interNodeBandwidthGBs,
|
| 1877 |
+
nodesPerRack: 9,
|
| 1878 |
+
rackLabel: 'NVL72 rack',
|
| 1879 |
+
nodeLabel: 'compute tray',
|
| 1880 |
+
podLabel: 'rack',
|
| 1881 |
+
}
|
| 1882 |
+
}
|
src/lib/viewOptions.ts
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import {
|
| 2 |
+
getScenarioWorkbenchConfig,
|
| 3 |
+
type WorkbenchConfig,
|
| 4 |
+
type WorkbenchScenarioId,
|
| 5 |
+
} from './workbench'
|
| 6 |
+
|
| 7 |
+
export type ViewOptions = {
|
| 8 |
+
debug: boolean
|
| 9 |
+
snapshot: boolean
|
| 10 |
+
scenario: WorkbenchScenarioId
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
const SCENARIOS = new Set<WorkbenchScenarioId>([
|
| 14 |
+
'default',
|
| 15 |
+
'olmo-pretraining',
|
| 16 |
+
'olmo-long-context',
|
| 17 |
+
'llama-pretraining',
|
| 18 |
+
'llama-long-context',
|
| 19 |
+
'trinity-pretraining',
|
| 20 |
+
'trinity-long-context',
|
| 21 |
+
'infeasible-memory',
|
| 22 |
+
])
|
| 23 |
+
|
| 24 |
+
const truthyValues = new Set(['1', 'true', 'yes', 'on'])
|
| 25 |
+
|
| 26 |
+
function isTruthy(value: string | null) {
|
| 27 |
+
if (value === null) {
|
| 28 |
+
return false
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
return truthyValues.has(value.toLowerCase())
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
export function getViewOptions(search = window.location.search): ViewOptions {
|
| 35 |
+
const params = new URLSearchParams(search)
|
| 36 |
+
const scenarioParam = params.get('scenario')
|
| 37 |
+
const scenario = SCENARIOS.has(scenarioParam as WorkbenchScenarioId)
|
| 38 |
+
? (scenarioParam as WorkbenchScenarioId)
|
| 39 |
+
: 'default'
|
| 40 |
+
|
| 41 |
+
return {
|
| 42 |
+
debug: isTruthy(params.get('debug')),
|
| 43 |
+
snapshot: isTruthy(params.get('snapshot')),
|
| 44 |
+
scenario,
|
| 45 |
+
}
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
export function getScenarioConfig(scenario: WorkbenchScenarioId): WorkbenchConfig {
|
| 49 |
+
return getScenarioWorkbenchConfig(scenario)
|
| 50 |
+
}
|
src/lib/workbench.ts
ADDED
|
@@ -0,0 +1,395 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import {
|
| 2 |
+
a100_80gb,
|
| 3 |
+
b300,
|
| 4 |
+
gb200,
|
| 5 |
+
h100_sxm,
|
| 6 |
+
llama31_405B,
|
| 7 |
+
olmo3_32B,
|
| 8 |
+
trinityLarge400B,
|
| 9 |
+
type ClusterConfig,
|
| 10 |
+
type GPUSpec,
|
| 11 |
+
type ModelConfig,
|
| 12 |
+
type ParallelismConfig,
|
| 13 |
+
type TrainingConfig,
|
| 14 |
+
} from './trainingClusterModel'
|
| 15 |
+
|
| 16 |
+
export type ExamplePresetId = 'olmo3-32b' | 'llama31-405b' | 'trinity-large-400b'
|
| 17 |
+
export type ExamplePhaseId = 'pretraining' | 'long-context'
|
| 18 |
+
export type GpuPresetId = 'a100-80gb' | 'h100-sxm' | 'b300' | 'gb200'
|
| 19 |
+
export type WorkbenchScenarioId =
|
| 20 |
+
| 'default'
|
| 21 |
+
| 'olmo-pretraining'
|
| 22 |
+
| 'olmo-long-context'
|
| 23 |
+
| 'llama-pretraining'
|
| 24 |
+
| 'llama-long-context'
|
| 25 |
+
| 'trinity-pretraining'
|
| 26 |
+
| 'trinity-long-context'
|
| 27 |
+
| 'infeasible-memory'
|
| 28 |
+
|
| 29 |
+
export type WorkbenchConfig = {
|
| 30 |
+
examplePresetId: ExamplePresetId
|
| 31 |
+
phaseId: ExamplePhaseId
|
| 32 |
+
customized: boolean
|
| 33 |
+
model: ModelConfig
|
| 34 |
+
training: TrainingConfig
|
| 35 |
+
cluster: ClusterConfig
|
| 36 |
+
parallelism: ParallelismConfig
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
type ExamplePhaseConfig = {
|
| 40 |
+
cluster: ClusterConfig
|
| 41 |
+
training: TrainingConfig
|
| 42 |
+
parallelism: ParallelismConfig
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
type ExamplePreset = {
|
| 46 |
+
label: string
|
| 47 |
+
model: () => ModelConfig
|
| 48 |
+
phases: Record<ExamplePhaseId, ExamplePhaseConfig>
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
const GPU_PRESETS: Record<GpuPresetId, { label: string; spec: () => GPUSpec }> = {
|
| 52 |
+
'a100-80gb': {
|
| 53 |
+
label: 'A100 80GB',
|
| 54 |
+
spec: a100_80gb,
|
| 55 |
+
},
|
| 56 |
+
'h100-sxm': {
|
| 57 |
+
label: 'H100 SXM',
|
| 58 |
+
spec: h100_sxm,
|
| 59 |
+
},
|
| 60 |
+
b300: {
|
| 61 |
+
label: 'B300',
|
| 62 |
+
spec: b300,
|
| 63 |
+
},
|
| 64 |
+
gb200: {
|
| 65 |
+
label: 'GB200',
|
| 66 |
+
spec: gb200,
|
| 67 |
+
},
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
const gpuPresetMatches = (candidate: GPUSpec, preset: GPUSpec) =>
|
| 71 |
+
candidate.name === preset.name &&
|
| 72 |
+
candidate.hbmCapacityGB === preset.hbmCapacityGB &&
|
| 73 |
+
candidate.peakTFLOPsBF16 === preset.peakTFLOPsBF16 &&
|
| 74 |
+
candidate.memBandwidthTBs === preset.memBandwidthTBs
|
| 75 |
+
|
| 76 |
+
const h100Cluster = (numNodes: number, nodesPerRack: number): ClusterConfig => ({
|
| 77 |
+
gpuType: h100_sxm(),
|
| 78 |
+
gpusPerNode: 8,
|
| 79 |
+
numNodes,
|
| 80 |
+
intraNodeBandwidthGBs: 900,
|
| 81 |
+
interNodeBandwidthGBs: 50,
|
| 82 |
+
nodesPerRack,
|
| 83 |
+
rackLabel: 'rack',
|
| 84 |
+
nodeLabel: 'GPU host',
|
| 85 |
+
podLabel: 'rack',
|
| 86 |
+
})
|
| 87 |
+
|
| 88 |
+
const b300Cluster = (numNodes: number, nodesPerRack: number): ClusterConfig => ({
|
| 89 |
+
gpuType: b300(),
|
| 90 |
+
gpusPerNode: 8,
|
| 91 |
+
numNodes,
|
| 92 |
+
intraNodeBandwidthGBs: 900,
|
| 93 |
+
interNodeBandwidthGBs: 50,
|
| 94 |
+
nodesPerRack,
|
| 95 |
+
rackLabel: 'rack',
|
| 96 |
+
nodeLabel: 'GPU host',
|
| 97 |
+
podLabel: 'rack',
|
| 98 |
+
})
|
| 99 |
+
|
| 100 |
+
export const EXAMPLE_PRESETS: Record<ExamplePresetId, ExamplePreset> = {
|
| 101 |
+
'olmo3-32b': {
|
| 102 |
+
label: 'OLMo 3 32B',
|
| 103 |
+
model: olmo3_32B,
|
| 104 |
+
phases: {
|
| 105 |
+
pretraining: {
|
| 106 |
+
cluster: h100Cluster(128, 16),
|
| 107 |
+
training: {
|
| 108 |
+
microBatchSize: 1,
|
| 109 |
+
seqLength: 8192,
|
| 110 |
+
gradAccumSteps: 1,
|
| 111 |
+
precision: 'bf16',
|
| 112 |
+
activationCheckpointing: true,
|
| 113 |
+
optimizer: 'adamw',
|
| 114 |
+
},
|
| 115 |
+
parallelism: {
|
| 116 |
+
tp: 1,
|
| 117 |
+
pp: 1,
|
| 118 |
+
cp: 1,
|
| 119 |
+
ep: 1,
|
| 120 |
+
distributedOptimizer: true,
|
| 121 |
+
fsdpShardGroupSize: 256,
|
| 122 |
+
zeroStage: 3,
|
| 123 |
+
},
|
| 124 |
+
},
|
| 125 |
+
'long-context': {
|
| 126 |
+
cluster: h100Cluster(32, 8),
|
| 127 |
+
training: {
|
| 128 |
+
microBatchSize: 1,
|
| 129 |
+
seqLength: 65536,
|
| 130 |
+
gradAccumSteps: 1,
|
| 131 |
+
precision: 'bf16',
|
| 132 |
+
activationCheckpointing: true,
|
| 133 |
+
optimizer: 'adamw',
|
| 134 |
+
},
|
| 135 |
+
parallelism: {
|
| 136 |
+
tp: 1,
|
| 137 |
+
pp: 1,
|
| 138 |
+
cp: 8,
|
| 139 |
+
ep: 1,
|
| 140 |
+
distributedOptimizer: true,
|
| 141 |
+
fsdpShardGroupSize: 256,
|
| 142 |
+
zeroStage: 3,
|
| 143 |
+
},
|
| 144 |
+
},
|
| 145 |
+
},
|
| 146 |
+
},
|
| 147 |
+
'llama31-405b': {
|
| 148 |
+
label: 'Llama 3.1 405B',
|
| 149 |
+
model: llama31_405B,
|
| 150 |
+
phases: {
|
| 151 |
+
pretraining: {
|
| 152 |
+
cluster: h100Cluster(2048, 16),
|
| 153 |
+
training: {
|
| 154 |
+
microBatchSize: 1,
|
| 155 |
+
seqLength: 8192,
|
| 156 |
+
gradAccumSteps: 16,
|
| 157 |
+
precision: 'bf16',
|
| 158 |
+
activationCheckpointing: true,
|
| 159 |
+
optimizer: 'adamw',
|
| 160 |
+
},
|
| 161 |
+
parallelism: {
|
| 162 |
+
tp: 8,
|
| 163 |
+
pp: 16,
|
| 164 |
+
cp: 1,
|
| 165 |
+
ep: 1,
|
| 166 |
+
distributedOptimizer: true,
|
| 167 |
+
fsdpShardGroupSize: 0,
|
| 168 |
+
zeroStage: 1,
|
| 169 |
+
},
|
| 170 |
+
},
|
| 171 |
+
'long-context': {
|
| 172 |
+
cluster: h100Cluster(2048, 16),
|
| 173 |
+
training: {
|
| 174 |
+
microBatchSize: 1,
|
| 175 |
+
seqLength: 131072,
|
| 176 |
+
gradAccumSteps: 1,
|
| 177 |
+
precision: 'bf16',
|
| 178 |
+
activationCheckpointing: true,
|
| 179 |
+
optimizer: 'adamw',
|
| 180 |
+
},
|
| 181 |
+
parallelism: {
|
| 182 |
+
tp: 8,
|
| 183 |
+
pp: 16,
|
| 184 |
+
cp: 16,
|
| 185 |
+
ep: 1,
|
| 186 |
+
distributedOptimizer: true,
|
| 187 |
+
fsdpShardGroupSize: 0,
|
| 188 |
+
zeroStage: 1,
|
| 189 |
+
},
|
| 190 |
+
},
|
| 191 |
+
},
|
| 192 |
+
},
|
| 193 |
+
'trinity-large-400b': {
|
| 194 |
+
label: 'Trinity Large 400B',
|
| 195 |
+
model: trinityLarge400B,
|
| 196 |
+
phases: {
|
| 197 |
+
pretraining: {
|
| 198 |
+
cluster: b300Cluster(256, 9),
|
| 199 |
+
training: {
|
| 200 |
+
microBatchSize: 1,
|
| 201 |
+
seqLength: 8192,
|
| 202 |
+
gradAccumSteps: 8,
|
| 203 |
+
precision: 'bf16',
|
| 204 |
+
activationCheckpointing: true,
|
| 205 |
+
optimizer: 'muon',
|
| 206 |
+
},
|
| 207 |
+
parallelism: {
|
| 208 |
+
tp: 1,
|
| 209 |
+
pp: 1,
|
| 210 |
+
cp: 1,
|
| 211 |
+
ep: 8,
|
| 212 |
+
distributedOptimizer: true,
|
| 213 |
+
fsdpShardGroupSize: 128,
|
| 214 |
+
zeroStage: 3,
|
| 215 |
+
},
|
| 216 |
+
},
|
| 217 |
+
'long-context': {
|
| 218 |
+
cluster: b300Cluster(256, 9),
|
| 219 |
+
training: {
|
| 220 |
+
microBatchSize: 1,
|
| 221 |
+
seqLength: 262144,
|
| 222 |
+
gradAccumSteps: 1,
|
| 223 |
+
precision: 'bf16',
|
| 224 |
+
activationCheckpointing: true,
|
| 225 |
+
optimizer: 'muon',
|
| 226 |
+
},
|
| 227 |
+
parallelism: {
|
| 228 |
+
tp: 1,
|
| 229 |
+
pp: 1,
|
| 230 |
+
cp: 4,
|
| 231 |
+
ep: 8,
|
| 232 |
+
distributedOptimizer: true,
|
| 233 |
+
fsdpShardGroupSize: 128,
|
| 234 |
+
zeroStage: 3,
|
| 235 |
+
},
|
| 236 |
+
},
|
| 237 |
+
},
|
| 238 |
+
},
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
const createWorkbenchConfig = (
|
| 242 |
+
examplePresetId: ExamplePresetId,
|
| 243 |
+
phaseId: ExamplePhaseId,
|
| 244 |
+
): WorkbenchConfig => {
|
| 245 |
+
const preset = EXAMPLE_PRESETS[examplePresetId]
|
| 246 |
+
const phase = preset.phases[phaseId]
|
| 247 |
+
|
| 248 |
+
return {
|
| 249 |
+
examplePresetId,
|
| 250 |
+
phaseId,
|
| 251 |
+
customized: false,
|
| 252 |
+
model: preset.model(),
|
| 253 |
+
training: { ...phase.training },
|
| 254 |
+
cluster: { ...phase.cluster },
|
| 255 |
+
parallelism: { ...phase.parallelism },
|
| 256 |
+
}
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
const SCENARIOS: Record<WorkbenchScenarioId, WorkbenchConfig> = {
|
| 260 |
+
default: createWorkbenchConfig('olmo3-32b', 'pretraining'),
|
| 261 |
+
'olmo-pretraining': createWorkbenchConfig('olmo3-32b', 'pretraining'),
|
| 262 |
+
'olmo-long-context': createWorkbenchConfig('olmo3-32b', 'long-context'),
|
| 263 |
+
'llama-pretraining': createWorkbenchConfig('llama31-405b', 'pretraining'),
|
| 264 |
+
'llama-long-context': createWorkbenchConfig('llama31-405b', 'long-context'),
|
| 265 |
+
'trinity-pretraining': createWorkbenchConfig('trinity-large-400b', 'pretraining'),
|
| 266 |
+
'trinity-long-context': createWorkbenchConfig('trinity-large-400b', 'long-context'),
|
| 267 |
+
'infeasible-memory': {
|
| 268 |
+
examplePresetId: 'llama31-405b',
|
| 269 |
+
phaseId: 'pretraining',
|
| 270 |
+
customized: false,
|
| 271 |
+
model: llama31_405B(),
|
| 272 |
+
training: {
|
| 273 |
+
microBatchSize: 1,
|
| 274 |
+
seqLength: 8192,
|
| 275 |
+
gradAccumSteps: 1,
|
| 276 |
+
precision: 'bf16',
|
| 277 |
+
activationCheckpointing: true,
|
| 278 |
+
optimizer: 'adamw',
|
| 279 |
+
},
|
| 280 |
+
cluster: h100Cluster(8, 4),
|
| 281 |
+
parallelism: {
|
| 282 |
+
tp: 8,
|
| 283 |
+
pp: 1,
|
| 284 |
+
cp: 1,
|
| 285 |
+
ep: 1,
|
| 286 |
+
distributedOptimizer: false,
|
| 287 |
+
fsdpShardGroupSize: 0,
|
| 288 |
+
zeroStage: 0,
|
| 289 |
+
},
|
| 290 |
+
},
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
const cloneModel = (model: ModelConfig): ModelConfig => ({
|
| 294 |
+
...model,
|
| 295 |
+
attentionProfile: model.attentionProfile ? { ...model.attentionProfile } : undefined,
|
| 296 |
+
moe: model.moe ? { ...model.moe } : undefined,
|
| 297 |
+
})
|
| 298 |
+
|
| 299 |
+
const cloneTraining = (training: TrainingConfig): TrainingConfig => ({ ...training })
|
| 300 |
+
|
| 301 |
+
const cloneCluster = (cluster: ClusterConfig): ClusterConfig => ({ ...cluster })
|
| 302 |
+
|
| 303 |
+
const cloneParallelism = (parallelism: ParallelismConfig): ParallelismConfig => ({
|
| 304 |
+
...parallelism,
|
| 305 |
+
})
|
| 306 |
+
|
| 307 |
+
export const cloneWorkbenchConfig = (config: WorkbenchConfig): WorkbenchConfig => ({
|
| 308 |
+
examplePresetId: config.examplePresetId,
|
| 309 |
+
phaseId: config.phaseId,
|
| 310 |
+
customized: config.customized,
|
| 311 |
+
model: cloneModel(config.model),
|
| 312 |
+
training: cloneTraining(config.training),
|
| 313 |
+
cluster: cloneCluster(config.cluster),
|
| 314 |
+
parallelism: cloneParallelism(config.parallelism),
|
| 315 |
+
})
|
| 316 |
+
|
| 317 |
+
export function getScenarioWorkbenchConfig(scenario: WorkbenchScenarioId) {
|
| 318 |
+
return cloneWorkbenchConfig(SCENARIOS[scenario])
|
| 319 |
+
}
|
| 320 |
+
|
| 321 |
+
export function getExamplePresetOptions() {
|
| 322 |
+
return Object.entries(EXAMPLE_PRESETS)
|
| 323 |
+
.filter(([id]) => id !== 'llama31-405b')
|
| 324 |
+
.map(([id, preset]) => ({
|
| 325 |
+
id: id as ExamplePresetId,
|
| 326 |
+
label: preset.label,
|
| 327 |
+
}))
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
export function getPhaseOptions(examplePresetId: ExamplePresetId) {
|
| 331 |
+
const preset = EXAMPLE_PRESETS[examplePresetId]
|
| 332 |
+
|
| 333 |
+
return Object.keys(preset.phases).map((phaseId) => ({
|
| 334 |
+
id: phaseId as ExamplePhaseId,
|
| 335 |
+
label: phaseId === 'pretraining' ? 'Pretraining' : 'Long-context',
|
| 336 |
+
}))
|
| 337 |
+
}
|
| 338 |
+
|
| 339 |
+
export function getExampleLabel(examplePresetId: ExamplePresetId) {
|
| 340 |
+
return EXAMPLE_PRESETS[examplePresetId].label
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
export function getGpuPresetOptions() {
|
| 344 |
+
return Object.entries(GPU_PRESETS).map(([id, preset]) => ({
|
| 345 |
+
id: id as GpuPresetId,
|
| 346 |
+
label: preset.label,
|
| 347 |
+
}))
|
| 348 |
+
}
|
| 349 |
+
|
| 350 |
+
export function getGpuPresetId(gpuType: GPUSpec): GpuPresetId | 'custom' {
|
| 351 |
+
for (const [id, preset] of Object.entries(GPU_PRESETS)) {
|
| 352 |
+
if (gpuPresetMatches(gpuType, preset.spec())) {
|
| 353 |
+
return id as GpuPresetId
|
| 354 |
+
}
|
| 355 |
+
}
|
| 356 |
+
|
| 357 |
+
return 'custom'
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
export function applyGpuPreset(config: WorkbenchConfig, gpuPresetId: GpuPresetId): WorkbenchConfig {
|
| 361 |
+
return {
|
| 362 |
+
...config,
|
| 363 |
+
customized: true,
|
| 364 |
+
cluster: {
|
| 365 |
+
...config.cluster,
|
| 366 |
+
gpuType: GPU_PRESETS[gpuPresetId].spec(),
|
| 367 |
+
},
|
| 368 |
+
}
|
| 369 |
+
}
|
| 370 |
+
|
| 371 |
+
export function applyExamplePreset(
|
| 372 |
+
_config: WorkbenchConfig,
|
| 373 |
+
examplePresetId: ExamplePresetId,
|
| 374 |
+
): WorkbenchConfig {
|
| 375 |
+
return createWorkbenchConfig(examplePresetId, 'pretraining')
|
| 376 |
+
}
|
| 377 |
+
|
| 378 |
+
export function applyExamplePhase(
|
| 379 |
+
config: WorkbenchConfig,
|
| 380 |
+
phaseId: ExamplePhaseId,
|
| 381 |
+
): WorkbenchConfig {
|
| 382 |
+
return createWorkbenchConfig(config.examplePresetId, phaseId)
|
| 383 |
+
}
|
| 384 |
+
|
| 385 |
+
export function getFactorOptions(total: number, currentValue: number) {
|
| 386 |
+
const factors = new Set<number>([currentValue])
|
| 387 |
+
|
| 388 |
+
for (let candidate = 1; candidate <= total; candidate += 1) {
|
| 389 |
+
if (total % candidate === 0) {
|
| 390 |
+
factors.add(candidate)
|
| 391 |
+
}
|
| 392 |
+
}
|
| 393 |
+
|
| 394 |
+
return Array.from(factors).sort((left, right) => left - right)
|
| 395 |
+
}
|
src/lib/workbenchPresenter.ts
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { type ClusterAnalysis } from './trainingClusterModel'
|
| 2 |
+
import { getExampleLabel, type WorkbenchConfig } from './workbench'
|
| 3 |
+
|
| 4 |
+
export type WorkbenchViewModel = {
|
| 5 |
+
config: WorkbenchConfig
|
| 6 |
+
analysis: ClusterAnalysis
|
| 7 |
+
structuralIssue: boolean
|
| 8 |
+
warnings: string[]
|
| 9 |
+
headline: string
|
| 10 |
+
subheadline: string
|
| 11 |
+
summary: {
|
| 12 |
+
throughputLabel: string
|
| 13 |
+
throughputNote: string
|
| 14 |
+
gpuLabel: string
|
| 15 |
+
gpuNote: string
|
| 16 |
+
interconnectLabel: string
|
| 17 |
+
interconnectNote: string
|
| 18 |
+
bottleneckLabel: string
|
| 19 |
+
bottleneckNote: string
|
| 20 |
+
}
|
| 21 |
+
facts: Array<{ label: string; value: string }>
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
const formatInteger = (value: number) => Math.round(value).toLocaleString()
|
| 25 |
+
const formatPercent = (value: number) => `${Math.round(value * 100)}%`
|
| 26 |
+
const formatPercentWhole = (value: number) => `${Math.round(value)}%`
|
| 27 |
+
const formatGB = (value: number) => `${value.toFixed(value >= 100 ? 0 : 1)} GB`
|
| 28 |
+
|
| 29 |
+
const getAllocatedGpuCount = (analysis: ClusterAnalysis) =>
|
| 30 |
+
analysis.gpuMap.filter((gpu) => gpu.memoryUsedGB > 0).length
|
| 31 |
+
|
| 32 |
+
const getActiveGpuCount = (analysis: ClusterAnalysis) =>
|
| 33 |
+
analysis.gpuMap.filter((gpu) => gpu.isActive).length
|
| 34 |
+
|
| 35 |
+
const getDominantCommLabel = (analysis: ClusterAnalysis) => {
|
| 36 |
+
const entries = [
|
| 37 |
+
['TP collectives', analysis.communication.tp.timePerStepMs],
|
| 38 |
+
['PP activations', analysis.communication.pp.timePerStepMs],
|
| 39 |
+
['CP sequence exchange', analysis.communication.cp.timePerStepMs],
|
| 40 |
+
['FSDP sharding', analysis.communication.fsdp.timePerStepMs],
|
| 41 |
+
['EP routing', analysis.communication.ep.timePerStepMs],
|
| 42 |
+
['DP sync', analysis.communication.dp.allReduceTimeMs],
|
| 43 |
+
['Pipeline bubble', analysis.throughput.pipelineBubbleTimeMs],
|
| 44 |
+
] as const
|
| 45 |
+
|
| 46 |
+
return [...entries].sort((left, right) => right[1] - left[1])[0][0]
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
export function buildWorkbenchViewModel(
|
| 50 |
+
config: WorkbenchConfig,
|
| 51 |
+
analysis: ClusterAnalysis,
|
| 52 |
+
): WorkbenchViewModel {
|
| 53 |
+
const requestedGpuCount =
|
| 54 |
+
config.parallelism.tp *
|
| 55 |
+
config.parallelism.pp *
|
| 56 |
+
config.parallelism.cp *
|
| 57 |
+
config.parallelism.ep *
|
| 58 |
+
analysis.derivedParallelism.dp
|
| 59 |
+
const allocatedGpuCount = getAllocatedGpuCount(analysis)
|
| 60 |
+
const activeGpuCount = getActiveGpuCount(analysis)
|
| 61 |
+
const totalGPUs = analysis.totalGPUs
|
| 62 |
+
const launchedGpuCount =
|
| 63 |
+
analysis.throughput.totalStepTimeMs > 0 ? Math.min(requestedGpuCount, totalGPUs) : 0
|
| 64 |
+
const darkGpuCount = Math.max(totalGPUs - launchedGpuCount, 0)
|
| 65 |
+
const nodesPerRack = config.cluster.nodesPerRack ?? config.cluster.numNodes
|
| 66 |
+
const rackCount = Math.ceil(config.cluster.numNodes / nodesPerRack)
|
| 67 |
+
const rackLabel = config.cluster.rackLabel ?? 'rack'
|
| 68 |
+
const nodeLabel = config.cluster.nodeLabel ?? 'node'
|
| 69 |
+
const structuralIssue = !analysis.feasible && analysis.throughput.totalStepTimeMs === 0
|
| 70 |
+
const warnings: string[] = []
|
| 71 |
+
|
| 72 |
+
if (!analysis.feasible && analysis.infeasibilityReason) {
|
| 73 |
+
warnings.push(analysis.infeasibilityReason)
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
if (structuralIssue) {
|
| 77 |
+
warnings.push('This layout is structurally invalid, so throughput and communication are not estimated.')
|
| 78 |
+
} else if (!analysis.feasible) {
|
| 79 |
+
warnings.push('The run is memory-infeasible, but the app still shows the attempted placement and estimated traffic.')
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
if (analysis.memoryBreakdown.utilizationPercent >= 92) {
|
| 83 |
+
warnings.push(
|
| 84 |
+
`Worst-case GPU HBM is ${formatPercentWhole(analysis.memoryBreakdown.utilizationPercent)} full.`,
|
| 85 |
+
)
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
if (analysis.throughput.pipelineBubbleFraction >= 0.18) {
|
| 89 |
+
warnings.push(
|
| 90 |
+
`Pipeline bubble is ${formatPercent(analysis.throughput.pipelineBubbleFraction)} of step time.`,
|
| 91 |
+
)
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
if (config.parallelism.cp > 1) {
|
| 95 |
+
warnings.push(
|
| 96 |
+
`CP shards each micro-batch into ${config.parallelism.cp} sequence slices and adds sequence exchange traffic.`,
|
| 97 |
+
)
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
if (config.parallelism.fsdpShardGroupSize > 1) {
|
| 101 |
+
warnings.push(
|
| 102 |
+
`HSDP shards weights across ${config.parallelism.fsdpShardGroupSize.toLocaleString()}-GPU groups, with ${analysis.derivedParallelism.replicaGroups} replica groups syncing once per step.`,
|
| 103 |
+
)
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
if (config.parallelism.ep > 1) {
|
| 107 |
+
warnings.push(
|
| 108 |
+
`EP routes tokens across ${config.parallelism.ep} expert lanes and adds expert all-to-all traffic.`,
|
| 109 |
+
)
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
if (!structuralIssue && darkGpuCount > 0) {
|
| 113 |
+
warnings.push(
|
| 114 |
+
`${darkGpuCount.toLocaleString()} GPUs are dark because this launch only uses ${launchedGpuCount.toLocaleString()} ranks.`,
|
| 115 |
+
)
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
const throughputLabel = structuralIssue
|
| 119 |
+
? 'n/a'
|
| 120 |
+
: formatInteger(analysis.throughput.tokensPerSecond)
|
| 121 |
+
const throughputNote = structuralIssue
|
| 122 |
+
? 'structural constraint violated'
|
| 123 |
+
: !analysis.feasible
|
| 124 |
+
? 'estimated despite HBM overflow'
|
| 125 |
+
: 'tokens / second'
|
| 126 |
+
const interconnectUtilization = Math.max(
|
| 127 |
+
analysis.communication.tp.linkUtilizationPercent,
|
| 128 |
+
analysis.communication.pp.usesInterNode
|
| 129 |
+
? analysis.communication.pp.timePerStepMs > 0
|
| 130 |
+
? analysis.communication.pp.totalVolumePerStepGB > 0
|
| 131 |
+
? Math.min(
|
| 132 |
+
100,
|
| 133 |
+
(analysis.communication.pp.totalVolumePerStepGB /
|
| 134 |
+
(config.cluster.interNodeBandwidthGBs *
|
| 135 |
+
(analysis.throughput.totalStepTimeMs / 1000 || 1))) *
|
| 136 |
+
100,
|
| 137 |
+
)
|
| 138 |
+
: 0
|
| 139 |
+
: 0
|
| 140 |
+
: 0,
|
| 141 |
+
analysis.communication.cp.linkUtilizationPercent,
|
| 142 |
+
analysis.communication.fsdp.linkUtilizationPercent,
|
| 143 |
+
analysis.communication.ep.linkUtilizationPercent,
|
| 144 |
+
analysis.communication.dp.linkUtilizationPercent,
|
| 145 |
+
)
|
| 146 |
+
const headlineGpuLabel =
|
| 147 |
+
structuralIssue || launchedGpuCount === totalGPUs
|
| 148 |
+
? `${totalGPUs.toLocaleString()} GPUs`
|
| 149 |
+
: `${launchedGpuCount.toLocaleString()} of ${totalGPUs.toLocaleString()} GPUs`
|
| 150 |
+
|
| 151 |
+
return {
|
| 152 |
+
config,
|
| 153 |
+
analysis,
|
| 154 |
+
structuralIssue,
|
| 155 |
+
warnings,
|
| 156 |
+
headline:
|
| 157 |
+
`${getExampleLabel(config.examplePresetId)}${config.customized ? ' (customized)' : ''} · ` +
|
| 158 |
+
`${config.phaseId} on ${headlineGpuLabel}`,
|
| 159 |
+
subheadline:
|
| 160 |
+
`${formatInteger(analysis.totalParams)} total params, ${formatInteger(analysis.activeParamsPerToken)} active params, ` +
|
| 161 |
+
`${config.model.numLayers} layers, ` +
|
| 162 |
+
`${rackCount} ${rackLabel}${rackCount === 1 ? '' : 's'} of ${config.cluster.gpuType.name}.`,
|
| 163 |
+
summary: {
|
| 164 |
+
throughputLabel,
|
| 165 |
+
throughputNote,
|
| 166 |
+
gpuLabel: `${activeGpuCount}/${launchedGpuCount || allocatedGpuCount || totalGPUs}`,
|
| 167 |
+
gpuNote:
|
| 168 |
+
structuralIssue
|
| 169 |
+
? 'launch invalid'
|
| 170 |
+
: launchedGpuCount === totalGPUs
|
| 171 |
+
? 'active in this placement'
|
| 172 |
+
: `${launchedGpuCount}/${totalGPUs} launched on this cluster`,
|
| 173 |
+
interconnectLabel: formatPercentWhole(interconnectUtilization),
|
| 174 |
+
interconnectNote: 'peak link utilization',
|
| 175 |
+
bottleneckLabel: analysis.feasible ? getDominantCommLabel(analysis) : 'HBM capacity',
|
| 176 |
+
bottleneckNote: analysis.feasible
|
| 177 |
+
? `${formatGB(analysis.memoryBreakdown.totalGB)} on the hottest GPU`
|
| 178 |
+
: analysis.infeasibilityReason ?? 'constraint violation',
|
| 179 |
+
},
|
| 180 |
+
facts: [
|
| 181 |
+
{
|
| 182 |
+
label: 'Model',
|
| 183 |
+
value: `${formatInteger(analysis.totalParams)} params`,
|
| 184 |
+
},
|
| 185 |
+
{
|
| 186 |
+
label: 'Context',
|
| 187 |
+
value: `${config.training.seqLength.toLocaleString()} tokens`,
|
| 188 |
+
},
|
| 189 |
+
{
|
| 190 |
+
label: 'Global batch',
|
| 191 |
+
value: `${analysis.globalBatchSizeTokens.toLocaleString()} tokens / step`,
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
label: 'Topology',
|
| 195 |
+
value: `${config.cluster.numNodes} ${nodeLabel}${config.cluster.numNodes === 1 ? '' : 's'}`,
|
| 196 |
+
},
|
| 197 |
+
{
|
| 198 |
+
label: 'Parallelism',
|
| 199 |
+
value:
|
| 200 |
+
`TP ${config.parallelism.tp} · PP ${config.parallelism.pp} · ` +
|
| 201 |
+
`CP ${config.parallelism.cp} · EP ${config.parallelism.ep} · DP ${analysis.derivedParallelism.dp}`,
|
| 202 |
+
},
|
| 203 |
+
{
|
| 204 |
+
label: 'Replica groups',
|
| 205 |
+
value: `${analysis.derivedParallelism.replicaGroups} groups`,
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
label: 'FSDP group',
|
| 209 |
+
value:
|
| 210 |
+
config.parallelism.fsdpShardGroupSize > 1
|
| 211 |
+
? `${config.parallelism.fsdpShardGroupSize.toLocaleString()} GPUs`
|
| 212 |
+
: 'disabled',
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
label: 'HBM headroom',
|
| 216 |
+
value: `${formatGB(config.cluster.gpuType.hbmCapacityGB - analysis.memoryBreakdown.totalGB)}`,
|
| 217 |
+
},
|
| 218 |
+
],
|
| 219 |
+
}
|
| 220 |
+
}
|
src/main.tsx
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { StrictMode } from 'react'
|
| 2 |
+
import { createRoot } from 'react-dom/client'
|
| 3 |
+
import '@fontsource/space-grotesk/400.css'
|
| 4 |
+
import '@fontsource/space-grotesk/500.css'
|
| 5 |
+
import '@fontsource/space-grotesk/700.css'
|
| 6 |
+
import '@fontsource/ibm-plex-mono/400.css'
|
| 7 |
+
import '@fontsource/ibm-plex-mono/500.css'
|
| 8 |
+
import './index.css'
|
| 9 |
+
import App from './App.tsx'
|
| 10 |
+
|
| 11 |
+
createRoot(document.getElementById('root')!).render(
|
| 12 |
+
<StrictMode>
|
| 13 |
+
<App />
|
| 14 |
+
</StrictMode>,
|
| 15 |
+
)
|
src/types/global.d.ts
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
export {}
|
| 2 |
+
|
| 3 |
+
declare global {
|
| 4 |
+
type TopologyDebugState = {
|
| 5 |
+
ready: boolean
|
| 6 |
+
viewport: {
|
| 7 |
+
x: number
|
| 8 |
+
y: number
|
| 9 |
+
scale: number
|
| 10 |
+
}
|
| 11 |
+
surfaceSize: {
|
| 12 |
+
width: number
|
| 13 |
+
height: number
|
| 14 |
+
}
|
| 15 |
+
objectCounts: {
|
| 16 |
+
pods: number
|
| 17 |
+
nodes: number
|
| 18 |
+
gpus: number
|
| 19 |
+
links: number
|
| 20 |
+
activeGpus: number
|
| 21 |
+
contextualNodes: number
|
| 22 |
+
}
|
| 23 |
+
objects: Record<
|
| 24 |
+
string,
|
| 25 |
+
{
|
| 26 |
+
x: number
|
| 27 |
+
y: number
|
| 28 |
+
width: number
|
| 29 |
+
height: number
|
| 30 |
+
}
|
| 31 |
+
>
|
| 32 |
+
hoveredTarget: {
|
| 33 |
+
kind: 'pod' | 'node' | 'gpu' | 'link'
|
| 34 |
+
id: string
|
| 35 |
+
} | null
|
| 36 |
+
pinnedTarget: {
|
| 37 |
+
kind: 'pod' | 'node' | 'gpu' | 'link'
|
| 38 |
+
id: string
|
| 39 |
+
} | null
|
| 40 |
+
detailLevel?: 'overview' | 'board' | 'package' | 'silicon' | 'micro'
|
| 41 |
+
setViewport?: (viewport: { x: number; y: number; scale: number }) => void
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
interface Window {
|
| 45 |
+
__PIXI_TOPOLOGY_APP__?: unknown
|
| 46 |
+
__PIXI_FLOW_APP__?: unknown
|
| 47 |
+
__TOPOLOGY_DEBUG__?: TopologyDebugState
|
| 48 |
+
}
|
| 49 |
+
}
|
tests/topology.spec.ts
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { expect, test, type Page } from '@playwright/test'
|
| 2 |
+
|
| 3 |
+
type DebugObjectMap = Record<
|
| 4 |
+
string,
|
| 5 |
+
{
|
| 6 |
+
x: number
|
| 7 |
+
y: number
|
| 8 |
+
width: number
|
| 9 |
+
height: number
|
| 10 |
+
}
|
| 11 |
+
>
|
| 12 |
+
|
| 13 |
+
async function gotoScenario(page: Page, query = '') {
|
| 14 |
+
await page.goto(`/?snapshot=1${query}`)
|
| 15 |
+
await page.waitForLoadState('networkidle')
|
| 16 |
+
await page.waitForFunction(() => {
|
| 17 |
+
const debug = window.__TOPOLOGY_DEBUG__ as
|
| 18 |
+
| { ready?: boolean; objects?: DebugObjectMap }
|
| 19 |
+
| undefined
|
| 20 |
+
|
| 21 |
+
return Boolean(debug?.ready && debug.objects && Object.keys(debug.objects).length > 0)
|
| 22 |
+
})
|
| 23 |
+
await page.evaluate(async () => {
|
| 24 |
+
await document.fonts.ready
|
| 25 |
+
})
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
async function getDebugObject(page: Page, id: string) {
|
| 29 |
+
return page.evaluate((objectId) => {
|
| 30 |
+
const debug = window.__TOPOLOGY_DEBUG__ as { objects: DebugObjectMap }
|
| 31 |
+
return debug.objects[objectId]
|
| 32 |
+
}, id)
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
async function getFirstObjectId(page: Page, prefix: string) {
|
| 36 |
+
return page.evaluate((value) => {
|
| 37 |
+
const debug = window.__TOPOLOGY_DEBUG__ as { objects: DebugObjectMap }
|
| 38 |
+
return Object.keys(debug.objects).find((key) => key.startsWith(value)) ?? null
|
| 39 |
+
}, prefix)
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
async function getSurfaceOffset(page: Page) {
|
| 43 |
+
const layer = page.getByTestId('topology-interaction-layer')
|
| 44 |
+
await layer.scrollIntoViewIfNeeded()
|
| 45 |
+
const box = await layer.boundingBox()
|
| 46 |
+
if (!box) {
|
| 47 |
+
throw new Error('missing topology interaction layer')
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
return box
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
async function objectCenter(page: Page, id: string) {
|
| 54 |
+
const object = await getDebugObject(page, id)
|
| 55 |
+
const surface = await getSurfaceOffset(page)
|
| 56 |
+
|
| 57 |
+
return {
|
| 58 |
+
x: surface.x + object.x + object.width / 2,
|
| 59 |
+
y: surface.y + object.y + object.height / 2,
|
| 60 |
+
}
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
test('default scenario screenshot', async ({ page }) => {
|
| 64 |
+
await gotoScenario(page)
|
| 65 |
+
await expect(page.getByTestId('topology-scene')).toHaveScreenshot('topology-default.png')
|
| 66 |
+
})
|
| 67 |
+
|
| 68 |
+
test('olmo pretraining screenshot', async ({ page }) => {
|
| 69 |
+
await gotoScenario(page, '&scenario=olmo-pretraining')
|
| 70 |
+
await expect(page.getByTestId('topology-scene')).toHaveScreenshot('topology-olmo-pretraining.png')
|
| 71 |
+
})
|
| 72 |
+
|
| 73 |
+
test('llama pretraining screenshot', async ({ page }) => {
|
| 74 |
+
await gotoScenario(page, '&scenario=llama-pretraining')
|
| 75 |
+
await expect(page.getByTestId('topology-scene')).toHaveScreenshot('topology-llama-pretraining.png')
|
| 76 |
+
})
|
| 77 |
+
|
| 78 |
+
test('trinity pretraining screenshot', async ({ page }) => {
|
| 79 |
+
await gotoScenario(page, '&scenario=trinity-pretraining')
|
| 80 |
+
await expect(page.getByTestId('topology-scene')).toHaveScreenshot('topology-trinity-pretraining.png')
|
| 81 |
+
})
|
| 82 |
+
|
| 83 |
+
test('olmo long-context screenshot', async ({ page }) => {
|
| 84 |
+
await gotoScenario(page, '&scenario=olmo-long-context')
|
| 85 |
+
await expect(page.getByTestId('topology-scene')).toHaveScreenshot('topology-olmo-long-context.png')
|
| 86 |
+
})
|
| 87 |
+
|
| 88 |
+
test('trinity long-context screenshot', async ({ page }) => {
|
| 89 |
+
await gotoScenario(page, '&scenario=trinity-long-context')
|
| 90 |
+
await expect(page.getByTestId('topology-scene')).toHaveScreenshot('topology-trinity-long-context.png')
|
| 91 |
+
})
|
| 92 |
+
|
| 93 |
+
test('infeasible memory screenshot', async ({ page }) => {
|
| 94 |
+
await gotoScenario(page, '&scenario=infeasible-memory')
|
| 95 |
+
await expect(page.getByTestId('infeasible-banner')).toBeVisible()
|
| 96 |
+
await expect(page.getByTestId('topology-scene')).toHaveScreenshot('topology-infeasible-memory.png')
|
| 97 |
+
})
|
| 98 |
+
|
| 99 |
+
test('hover highlight screenshot', async ({ page }) => {
|
| 100 |
+
await gotoScenario(page, '&scenario=olmo-pretraining')
|
| 101 |
+
const nodeId = await getFirstObjectId(page, 'node-')
|
| 102 |
+
if (!nodeId) {
|
| 103 |
+
throw new Error('missing visible node object')
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
const object = await getDebugObject(page, nodeId)
|
| 107 |
+
const surface = await getSurfaceOffset(page)
|
| 108 |
+
const target = {
|
| 109 |
+
x: surface.x + object.x + 6,
|
| 110 |
+
y: surface.y + object.y + 6,
|
| 111 |
+
}
|
| 112 |
+
await page.mouse.move(target.x, target.y)
|
| 113 |
+
await page.waitForFunction((id) => {
|
| 114 |
+
const debug = window.__TOPOLOGY_DEBUG__ as { hoveredTarget?: { id: string } | null }
|
| 115 |
+
return debug.hoveredTarget?.id === id
|
| 116 |
+
}, nodeId)
|
| 117 |
+
await expect(page.getByTestId('topology-inspector')).toContainText(/host/i)
|
| 118 |
+
await expect(page.getByTestId('topology-scene')).toHaveScreenshot('topology-hover-node.png')
|
| 119 |
+
})
|
| 120 |
+
|
| 121 |
+
test('pinned inspector screenshot', async ({ page }) => {
|
| 122 |
+
await gotoScenario(page, '&scenario=olmo-pretraining')
|
| 123 |
+
const gpuId = await getFirstObjectId(page, 'gpu-')
|
| 124 |
+
if (!gpuId) {
|
| 125 |
+
throw new Error('missing visible gpu object')
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
const target = await objectCenter(page, gpuId)
|
| 129 |
+
await page.mouse.click(target.x, target.y)
|
| 130 |
+
await page.waitForFunction((id) => {
|
| 131 |
+
const debug = window.__TOPOLOGY_DEBUG__ as { pinnedTarget?: { id: string } | null }
|
| 132 |
+
return debug.pinnedTarget?.id === id
|
| 133 |
+
}, gpuId)
|
| 134 |
+
await expect(page.getByTestId('topology-inspector')).toContainText('GPU')
|
| 135 |
+
await expect(page.getByTestId('topology-scene')).toHaveScreenshot('topology-pinned-gpu.png')
|
| 136 |
+
})
|
| 137 |
+
|
| 138 |
+
test('debug overlay screenshot', async ({ page }) => {
|
| 139 |
+
await gotoScenario(page, '&debug=1')
|
| 140 |
+
await expect(page.getByTestId('topology-debug')).toBeVisible()
|
| 141 |
+
await expect(page.getByTestId('topology-interaction-layer')).toHaveScreenshot(
|
| 142 |
+
'topology-debug-overlay.png',
|
| 143 |
+
)
|
| 144 |
+
})
|
| 145 |
+
|
| 146 |
+
test('supports zoom pan and reset camera', async ({ page }) => {
|
| 147 |
+
await gotoScenario(page)
|
| 148 |
+
const layer = page.getByTestId('topology-interaction-layer')
|
| 149 |
+
await layer.scrollIntoViewIfNeeded()
|
| 150 |
+
const before = await page.evaluate(() => {
|
| 151 |
+
return (window.__TOPOLOGY_DEBUG__ as { viewport: { scale: number; x: number } }).viewport
|
| 152 |
+
})
|
| 153 |
+
const scrollBefore = await page.evaluate(() => window.scrollY)
|
| 154 |
+
|
| 155 |
+
const box = await layer.boundingBox()
|
| 156 |
+
if (!box) {
|
| 157 |
+
throw new Error('missing interaction layer bounds')
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
await page.mouse.move(box.x + box.width / 2, box.y + box.height / 2)
|
| 161 |
+
await page.mouse.wheel(0, -320)
|
| 162 |
+
await page.waitForFunction((scale) => {
|
| 163 |
+
const debug = window.__TOPOLOGY_DEBUG__ as { viewport: { scale: number } }
|
| 164 |
+
return debug.viewport.scale > scale
|
| 165 |
+
}, before.scale)
|
| 166 |
+
|
| 167 |
+
const afterZoom = await page.evaluate(() => {
|
| 168 |
+
return (window.__TOPOLOGY_DEBUG__ as { viewport: { scale: number; x: number } }).viewport
|
| 169 |
+
})
|
| 170 |
+
expect(afterZoom.scale).toBeGreaterThan(before.scale)
|
| 171 |
+
expect(await page.evaluate(() => window.scrollY)).toBe(scrollBefore)
|
| 172 |
+
|
| 173 |
+
await page.mouse.down()
|
| 174 |
+
await page.mouse.move(box.x + box.width / 2 + 80, box.y + box.height / 2 + 60, {
|
| 175 |
+
steps: 6,
|
| 176 |
+
})
|
| 177 |
+
await page.mouse.up()
|
| 178 |
+
await page.waitForFunction((x) => {
|
| 179 |
+
const debug = window.__TOPOLOGY_DEBUG__ as { viewport: { x: number } }
|
| 180 |
+
return debug.viewport.x !== x
|
| 181 |
+
}, afterZoom.x)
|
| 182 |
+
|
| 183 |
+
const afterPan = await page.evaluate(() => {
|
| 184 |
+
return (window.__TOPOLOGY_DEBUG__ as { viewport: { x: number } }).viewport
|
| 185 |
+
})
|
| 186 |
+
expect(afterPan.x).not.toBe(afterZoom.x)
|
| 187 |
+
|
| 188 |
+
await page.getByTestId('camera-reset').click()
|
| 189 |
+
await page.waitForFunction((scale) => {
|
| 190 |
+
const debug = window.__TOPOLOGY_DEBUG__ as { viewport: { scale: number } }
|
| 191 |
+
return Math.abs(debug.viewport.scale - scale) < 0.01
|
| 192 |
+
}, before.scale)
|
| 193 |
+
|
| 194 |
+
const afterReset = await page.evaluate(() => {
|
| 195 |
+
return (window.__TOPOLOGY_DEBUG__ as { viewport: { scale: number } }).viewport
|
| 196 |
+
})
|
| 197 |
+
expect(Math.abs(afterReset.scale - before.scale)).toBeLessThan(0.01)
|
| 198 |
+
})
|
| 199 |
+
|
| 200 |
+
test('supports pin and unpin via click', async ({ page }) => {
|
| 201 |
+
await gotoScenario(page, '&scenario=olmo-pretraining')
|
| 202 |
+
const gpuId = await getFirstObjectId(page, 'gpu-')
|
| 203 |
+
if (!gpuId) {
|
| 204 |
+
throw new Error('missing visible gpu object')
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
const gpu = await objectCenter(page, gpuId)
|
| 208 |
+
|
| 209 |
+
await page.mouse.click(gpu.x, gpu.y)
|
| 210 |
+
await page.waitForFunction((id) => {
|
| 211 |
+
const debug = window.__TOPOLOGY_DEBUG__ as { pinnedTarget?: { id: string } | null }
|
| 212 |
+
return debug.pinnedTarget?.id === id
|
| 213 |
+
}, gpuId)
|
| 214 |
+
await expect(page.getByTestId('topology-inspector')).toContainText('GPU')
|
| 215 |
+
|
| 216 |
+
await page.mouse.click(gpu.x, gpu.y)
|
| 217 |
+
await page.waitForFunction(() => {
|
| 218 |
+
const debug = window.__TOPOLOGY_DEBUG__ as { pinnedTarget?: { id: string } | null }
|
| 219 |
+
return debug.pinnedTarget == null
|
| 220 |
+
})
|
| 221 |
+
await expect(page.getByTestId('topology-inspector')).toContainText('Hover target')
|
| 222 |
+
await expect(page.getByTestId('topology-inspector')).toContainText('GPU')
|
| 223 |
+
})
|
| 224 |
+
|
| 225 |
+
test('supports manual model and cluster edits beyond the example presets', async ({ page }) => {
|
| 226 |
+
await gotoScenario(page, '&scenario=olmo-pretraining')
|
| 227 |
+
|
| 228 |
+
await page.getByLabel('Hidden dim').fill('6144')
|
| 229 |
+
await expect(page.locator('.control-badge', { hasText: 'customized' })).toBeVisible()
|
| 230 |
+
await expect(page.getByText(/hidden 6,144/i)).toBeVisible()
|
| 231 |
+
|
| 232 |
+
await page.getByRole('spinbutton', { name: 'Nodes', exact: true }).fill('64')
|
| 233 |
+
await expect(page.getByText('512 GPUs in cluster')).toBeVisible()
|
| 234 |
+
})
|
tests/topologyLod.test.ts
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { describe, expect, it } from 'vitest'
|
| 2 |
+
|
| 3 |
+
import { getTopologyLodState } from '../src/lib/topologyLod'
|
| 4 |
+
|
| 5 |
+
describe('topology lod policy', () => {
|
| 6 |
+
it('keeps overview strongest at cluster-scale zoom', () => {
|
| 7 |
+
const lod = getTopologyLodState(0.05)
|
| 8 |
+
|
| 9 |
+
expect(lod.primaryBand).toBe('overview')
|
| 10 |
+
expect(lod.weights.overview).toBe(1)
|
| 11 |
+
expect(lod.weights.board).toBe(0)
|
| 12 |
+
})
|
| 13 |
+
|
| 14 |
+
it('cross-fades only between adjacent detail bands', () => {
|
| 15 |
+
const boardToPackage = getTopologyLodState(2.8)
|
| 16 |
+
const packageLod = getTopologyLodState(6.5)
|
| 17 |
+
const siliconLod = getTopologyLodState(40)
|
| 18 |
+
|
| 19 |
+
expect(boardToPackage.weights.board).toBeGreaterThan(0)
|
| 20 |
+
expect(boardToPackage.weights.package).toBeGreaterThan(0)
|
| 21 |
+
expect(boardToPackage.weights.silicon).toBe(0)
|
| 22 |
+
expect(packageLod.weights.package).toBeGreaterThan(0.4)
|
| 23 |
+
expect(packageLod.weights.board).toBe(0)
|
| 24 |
+
expect(siliconLod.primaryBand).toBe('silicon')
|
| 25 |
+
expect(siliconLod.weights.package).toBe(0)
|
| 26 |
+
})
|
| 27 |
+
|
| 28 |
+
it('activates deep isolation only at extreme gpu zoom', () => {
|
| 29 |
+
const shallow = getTopologyLodState(4)
|
| 30 |
+
const deep = getTopologyLodState(140)
|
| 31 |
+
|
| 32 |
+
expect(shallow.deepIsolation).toBeLessThan(0.1)
|
| 33 |
+
expect(deep.deepIsolation).toBeGreaterThan(0.8)
|
| 34 |
+
expect(deep.weights.micro).toBeGreaterThan(0.5)
|
| 35 |
+
})
|
| 36 |
+
|
| 37 |
+
it('keeps lod weights normalized to a single active blend', () => {
|
| 38 |
+
const scales = [0.05, 0.2, 1.1, 3, 8, 24, 110]
|
| 39 |
+
|
| 40 |
+
for (const scale of scales) {
|
| 41 |
+
const lod = getTopologyLodState(scale)
|
| 42 |
+
const total = Object.values(lod.weights).reduce((sum, value) => sum + value, 0)
|
| 43 |
+
const activeBands = Object.values(lod.weights).filter((value) => value > 0.001).length
|
| 44 |
+
|
| 45 |
+
expect(total).toBeCloseTo(1, 4)
|
| 46 |
+
expect(activeBands).toBeLessThanOrEqual(2)
|
| 47 |
+
}
|
| 48 |
+
})
|
| 49 |
+
})
|
tests/topologySceneModel.test.ts
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { describe, expect, it } from 'vitest'
|
| 2 |
+
|
| 3 |
+
import { buildTopologySceneModel, describeTarget } from '../src/lib/topologyScene'
|
| 4 |
+
import { analyzeCluster } from '../src/lib/trainingClusterModel'
|
| 5 |
+
import { buildWorkbenchViewModel } from '../src/lib/workbenchPresenter'
|
| 6 |
+
import { getScenarioWorkbenchConfig } from '../src/lib/workbench'
|
| 7 |
+
|
| 8 |
+
describe('topology scene model', () => {
|
| 9 |
+
it('groups nodes into racks using cluster metadata', () => {
|
| 10 |
+
const config = getScenarioWorkbenchConfig('trinity-pretraining')
|
| 11 |
+
const analysis = analyzeCluster(config.model, config.training, config.cluster, config.parallelism)
|
| 12 |
+
const viewModel = buildWorkbenchViewModel(config, analysis)
|
| 13 |
+
const scene = buildTopologySceneModel(viewModel)
|
| 14 |
+
|
| 15 |
+
expect(scene.pods.length).toBeGreaterThan(1)
|
| 16 |
+
expect(scene.nodes).toHaveLength(config.cluster.numNodes)
|
| 17 |
+
expect(scene.objectCounts.gpus).toBe(config.cluster.numNodes * config.cluster.gpusPerNode)
|
| 18 |
+
expect(scene.lodPolicy.maxScale).toBeGreaterThan(100)
|
| 19 |
+
})
|
| 20 |
+
|
| 21 |
+
it('describes GPUs with analysis-backed shard indices, including EP and FSDP', () => {
|
| 22 |
+
const config = getScenarioWorkbenchConfig('trinity-pretraining')
|
| 23 |
+
const analysis = analyzeCluster(config.model, config.training, config.cluster, config.parallelism)
|
| 24 |
+
const viewModel = buildWorkbenchViewModel(config, analysis)
|
| 25 |
+
const scene = buildTopologySceneModel(viewModel)
|
| 26 |
+
const gpu = scene.nodes.flatMap((node) => node.gpus).find((item) => item.memoryUsedGB > 0)
|
| 27 |
+
if (!gpu) {
|
| 28 |
+
throw new Error('expected at least one allocated gpu')
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
const details = describeTarget(scene, viewModel, { kind: 'gpu', id: gpu.id })
|
| 32 |
+
expect(details?.metrics.some((metric) => metric.label === 'Expert lane')).toBe(true)
|
| 33 |
+
expect(details?.metrics.some((metric) => metric.label === 'FSDP rank')).toBe(true)
|
| 34 |
+
})
|
| 35 |
+
|
| 36 |
+
it('keeps the scene renderable for infeasible configurations', () => {
|
| 37 |
+
const config = getScenarioWorkbenchConfig('infeasible-memory')
|
| 38 |
+
const analysis = analyzeCluster(config.model, config.training, config.cluster, config.parallelism)
|
| 39 |
+
const viewModel = buildWorkbenchViewModel(config, analysis)
|
| 40 |
+
const scene = buildTopologySceneModel(viewModel)
|
| 41 |
+
|
| 42 |
+
expect(analysis.feasible).toBe(false)
|
| 43 |
+
expect(scene.nodes.length).toBeGreaterThan(0)
|
| 44 |
+
expect(viewModel.warnings[0]).toContain('exceeding')
|
| 45 |
+
})
|
| 46 |
+
|
| 47 |
+
it('exposes EP traffic links in the Trinity preset', () => {
|
| 48 |
+
const config = getScenarioWorkbenchConfig('trinity-pretraining')
|
| 49 |
+
const analysis = analyzeCluster(config.model, config.training, config.cluster, config.parallelism)
|
| 50 |
+
const viewModel = buildWorkbenchViewModel(config, analysis)
|
| 51 |
+
const scene = buildTopologySceneModel(viewModel)
|
| 52 |
+
|
| 53 |
+
expect(scene.rowLinks.concat(scene.columnLinks, scene.busLinks).some((link) => link.trafficType === 'ep')).toBe(true)
|
| 54 |
+
})
|
| 55 |
+
|
| 56 |
+
it('exposes CP traffic links in the OLMo long-context preset', () => {
|
| 57 |
+
const config = getScenarioWorkbenchConfig('olmo-long-context')
|
| 58 |
+
const analysis = analyzeCluster(config.model, config.training, config.cluster, config.parallelism)
|
| 59 |
+
const viewModel = buildWorkbenchViewModel(config, analysis)
|
| 60 |
+
const scene = buildTopologySceneModel(viewModel)
|
| 61 |
+
|
| 62 |
+
expect(scene.rowLinks.concat(scene.columnLinks, scene.busLinks).some((link) => link.trafficType === 'cp')).toBe(true)
|
| 63 |
+
})
|
| 64 |
+
|
| 65 |
+
it('collapses cross-rack links to rack centers instead of drawing node-to-node lines across racks', () => {
|
| 66 |
+
const config = getScenarioWorkbenchConfig('llama-pretraining')
|
| 67 |
+
const analysis = analyzeCluster(config.model, config.training, config.cluster, config.parallelism)
|
| 68 |
+
const viewModel = buildWorkbenchViewModel(config, analysis)
|
| 69 |
+
const scene = buildTopologySceneModel(viewModel)
|
| 70 |
+
const rackLink = scene.rowLinks
|
| 71 |
+
.concat(scene.columnLinks)
|
| 72 |
+
.find((link) => link.scope === 'rack' && link.transport === 'infiniband')
|
| 73 |
+
|
| 74 |
+
expect(rackLink).toBeDefined()
|
| 75 |
+
expect(scene.pods.some((pod) => pod.centerX === rackLink?.x1 && pod.centerY === rackLink?.y1)).toBe(true)
|
| 76 |
+
expect(scene.pods.some((pod) => pod.centerX === rackLink?.x2 && pod.centerY === rackLink?.y2)).toBe(true)
|
| 77 |
+
})
|
| 78 |
+
|
| 79 |
+
it('keeps stable focus and lod frames for every gpu', () => {
|
| 80 |
+
const config = getScenarioWorkbenchConfig('llama-pretraining')
|
| 81 |
+
const analysis = analyzeCluster(config.model, config.training, config.cluster, config.parallelism)
|
| 82 |
+
const viewModel = buildWorkbenchViewModel(config, analysis)
|
| 83 |
+
const scene = buildTopologySceneModel(viewModel)
|
| 84 |
+
const gpus = scene.nodes.flatMap((node) => node.gpus)
|
| 85 |
+
|
| 86 |
+
expect(gpus.length).toBe(scene.objectCounts.gpus)
|
| 87 |
+
expect(gpus.every((gpu) => gpu.focusFrame.width >= gpu.width && gpu.lodFrame.width === gpu.width)).toBe(true)
|
| 88 |
+
expect(gpus.every((gpu) => gpu.focusFrame.height >= gpu.height && gpu.lodFrame.height === gpu.height)).toBe(true)
|
| 89 |
+
})
|
| 90 |
+
})
|
tests/trainingClusterModel.test.ts
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { describe, expect, it } from 'vitest'
|
| 2 |
+
|
| 3 |
+
import {
|
| 4 |
+
a100_80gb,
|
| 5 |
+
analyzeCluster,
|
| 6 |
+
b300,
|
| 7 |
+
cluster64GPU,
|
| 8 |
+
h100_sxm,
|
| 9 |
+
llama70B,
|
| 10 |
+
llama7B,
|
| 11 |
+
llama31_405B,
|
| 12 |
+
olmo3_32B,
|
| 13 |
+
singleNode8GPU,
|
| 14 |
+
trinityLarge400B,
|
| 15 |
+
type ClusterConfig,
|
| 16 |
+
type TrainingConfig,
|
| 17 |
+
} from '../src/lib/trainingClusterModel'
|
| 18 |
+
|
| 19 |
+
const baselineTraining: TrainingConfig = {
|
| 20 |
+
microBatchSize: 1,
|
| 21 |
+
seqLength: 2048,
|
| 22 |
+
gradAccumSteps: 8,
|
| 23 |
+
precision: 'bf16',
|
| 24 |
+
activationCheckpointing: true,
|
| 25 |
+
optimizer: 'adamw',
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
describe('trainingClusterModel', () => {
|
| 29 |
+
it('fits Llama 2 7B on 8x A100 80GB with TP=8 and derived DP=1', () => {
|
| 30 |
+
const analysis = analyzeCluster(llama7B(), baselineTraining, singleNode8GPU(a100_80gb()), {
|
| 31 |
+
tp: 8,
|
| 32 |
+
pp: 1,
|
| 33 |
+
cp: 1,
|
| 34 |
+
ep: 1,
|
| 35 |
+
distributedOptimizer: false,
|
| 36 |
+
fsdpShardGroupSize: 0,
|
| 37 |
+
zeroStage: 0,
|
| 38 |
+
})
|
| 39 |
+
|
| 40 |
+
expect(analysis.feasible).toBe(true)
|
| 41 |
+
expect(analysis.derivedParallelism.dp).toBe(1)
|
| 42 |
+
expect(analysis.memoryBreakdown.totalGB).toBeLessThan(80)
|
| 43 |
+
})
|
| 44 |
+
|
| 45 |
+
it('marks Llama 2 70B on 8x A100 80GB as infeasible for unsharded Adam training', () => {
|
| 46 |
+
const analysis = analyzeCluster(llama70B(), baselineTraining, singleNode8GPU(a100_80gb()), {
|
| 47 |
+
tp: 8,
|
| 48 |
+
pp: 1,
|
| 49 |
+
cp: 1,
|
| 50 |
+
ep: 1,
|
| 51 |
+
distributedOptimizer: false,
|
| 52 |
+
fsdpShardGroupSize: 0,
|
| 53 |
+
zeroStage: 0,
|
| 54 |
+
})
|
| 55 |
+
|
| 56 |
+
expect(analysis.feasible).toBe(false)
|
| 57 |
+
expect(analysis.infeasibilityReason).toContain('exceeding 80 GB of HBM')
|
| 58 |
+
})
|
| 59 |
+
|
| 60 |
+
it('keeps MFU in a realistic range for a balanced 64x H100 dense run', () => {
|
| 61 |
+
const analysis = analyzeCluster(
|
| 62 |
+
llama70B(),
|
| 63 |
+
{
|
| 64 |
+
...baselineTraining,
|
| 65 |
+
seqLength: 4096,
|
| 66 |
+
gradAccumSteps: 16,
|
| 67 |
+
},
|
| 68 |
+
cluster64GPU(h100_sxm()),
|
| 69 |
+
{
|
| 70 |
+
tp: 4,
|
| 71 |
+
pp: 4,
|
| 72 |
+
cp: 1,
|
| 73 |
+
ep: 1,
|
| 74 |
+
distributedOptimizer: true,
|
| 75 |
+
fsdpShardGroupSize: 0,
|
| 76 |
+
zeroStage: 1,
|
| 77 |
+
},
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
expect(analysis.feasible).toBe(true)
|
| 81 |
+
expect(analysis.derivedParallelism.dp).toBe(4)
|
| 82 |
+
expect(analysis.throughput.mfu).toBeGreaterThan(0.3)
|
| 83 |
+
expect(analysis.throughput.mfu).toBeLessThanOrEqual(0.62)
|
| 84 |
+
})
|
| 85 |
+
|
| 86 |
+
it('reduces activation memory when CP increases and adds CP communication', () => {
|
| 87 |
+
const withoutCp = analyzeCluster(
|
| 88 |
+
llama70B(),
|
| 89 |
+
{
|
| 90 |
+
...baselineTraining,
|
| 91 |
+
seqLength: 4096,
|
| 92 |
+
},
|
| 93 |
+
cluster64GPU(h100_sxm()),
|
| 94 |
+
{
|
| 95 |
+
tp: 2,
|
| 96 |
+
pp: 2,
|
| 97 |
+
cp: 1,
|
| 98 |
+
ep: 1,
|
| 99 |
+
distributedOptimizer: true,
|
| 100 |
+
fsdpShardGroupSize: 0,
|
| 101 |
+
zeroStage: 1,
|
| 102 |
+
},
|
| 103 |
+
)
|
| 104 |
+
const withCp = analyzeCluster(
|
| 105 |
+
llama70B(),
|
| 106 |
+
{
|
| 107 |
+
...baselineTraining,
|
| 108 |
+
seqLength: 4096,
|
| 109 |
+
},
|
| 110 |
+
cluster64GPU(h100_sxm()),
|
| 111 |
+
{
|
| 112 |
+
tp: 2,
|
| 113 |
+
pp: 2,
|
| 114 |
+
cp: 4,
|
| 115 |
+
ep: 1,
|
| 116 |
+
distributedOptimizer: true,
|
| 117 |
+
fsdpShardGroupSize: 0,
|
| 118 |
+
zeroStage: 1,
|
| 119 |
+
},
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
expect(withCp.memoryBreakdown.activationsGB).toBeLessThan(withoutCp.memoryBreakdown.activationsGB)
|
| 123 |
+
expect(withCp.communication.cp.totalVolumePerStepGB).toBeGreaterThan(0)
|
| 124 |
+
})
|
| 125 |
+
|
| 126 |
+
it('reduces OLMo memory with HSDP shard groups compared with plain DP', () => {
|
| 127 |
+
const cluster = {
|
| 128 |
+
...cluster64GPU(h100_sxm()),
|
| 129 |
+
numNodes: 128,
|
| 130 |
+
nodesPerRack: 16,
|
| 131 |
+
}
|
| 132 |
+
const plain = analyzeCluster(
|
| 133 |
+
olmo3_32B(),
|
| 134 |
+
{
|
| 135 |
+
microBatchSize: 1,
|
| 136 |
+
seqLength: 8192,
|
| 137 |
+
gradAccumSteps: 1,
|
| 138 |
+
precision: 'bf16',
|
| 139 |
+
activationCheckpointing: true,
|
| 140 |
+
optimizer: 'adamw',
|
| 141 |
+
},
|
| 142 |
+
cluster,
|
| 143 |
+
{
|
| 144 |
+
tp: 1,
|
| 145 |
+
pp: 1,
|
| 146 |
+
cp: 1,
|
| 147 |
+
ep: 1,
|
| 148 |
+
distributedOptimizer: false,
|
| 149 |
+
fsdpShardGroupSize: 0,
|
| 150 |
+
zeroStage: 0,
|
| 151 |
+
},
|
| 152 |
+
)
|
| 153 |
+
const hsdp = analyzeCluster(
|
| 154 |
+
olmo3_32B(),
|
| 155 |
+
{
|
| 156 |
+
microBatchSize: 1,
|
| 157 |
+
seqLength: 8192,
|
| 158 |
+
gradAccumSteps: 1,
|
| 159 |
+
precision: 'bf16',
|
| 160 |
+
activationCheckpointing: true,
|
| 161 |
+
optimizer: 'adamw',
|
| 162 |
+
},
|
| 163 |
+
cluster,
|
| 164 |
+
{
|
| 165 |
+
tp: 1,
|
| 166 |
+
pp: 1,
|
| 167 |
+
cp: 1,
|
| 168 |
+
ep: 1,
|
| 169 |
+
distributedOptimizer: true,
|
| 170 |
+
fsdpShardGroupSize: 256,
|
| 171 |
+
zeroStage: 3,
|
| 172 |
+
},
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
expect(hsdp.derivedParallelism.replicaGroups).toBe(4)
|
| 176 |
+
expect(hsdp.memoryBreakdown.totalGB).toBeLessThan(plain.memoryBreakdown.totalGB)
|
| 177 |
+
expect(hsdp.communication.fsdp.totalVolumePerStepGB).toBeGreaterThan(0)
|
| 178 |
+
})
|
| 179 |
+
|
| 180 |
+
it('models Trinity as total-parameter-heavy but active-compute-light', () => {
|
| 181 |
+
const analysis = analyzeCluster(
|
| 182 |
+
trinityLarge400B(),
|
| 183 |
+
{
|
| 184 |
+
microBatchSize: 1,
|
| 185 |
+
seqLength: 8192,
|
| 186 |
+
gradAccumSteps: 8,
|
| 187 |
+
precision: 'bf16',
|
| 188 |
+
activationCheckpointing: true,
|
| 189 |
+
optimizer: 'muon',
|
| 190 |
+
},
|
| 191 |
+
trinityCluster(),
|
| 192 |
+
{
|
| 193 |
+
tp: 1,
|
| 194 |
+
pp: 1,
|
| 195 |
+
cp: 1,
|
| 196 |
+
ep: 8,
|
| 197 |
+
distributedOptimizer: true,
|
| 198 |
+
fsdpShardGroupSize: 128,
|
| 199 |
+
zeroStage: 3,
|
| 200 |
+
},
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
expect(analysis.totalParams).toBeGreaterThan(300_000_000_000)
|
| 204 |
+
expect(analysis.activeParamsPerToken).toBe(13_000_000_000)
|
| 205 |
+
expect(analysis.communication.ep.totalVolumePerStepGB).toBeGreaterThan(0)
|
| 206 |
+
expect(analysis.communication.ep.usesInterNode).toBe(false)
|
| 207 |
+
expect(new Set(analysis.gpuMap.map((gpu) => gpu.epLane))).toEqual(
|
| 208 |
+
new Set([0, 1, 2, 3, 4, 5, 6, 7]),
|
| 209 |
+
)
|
| 210 |
+
})
|
| 211 |
+
|
| 212 |
+
it('derives DP for Llama 3.1 405B from world size and 4D parallelism', () => {
|
| 213 |
+
const analysis = analyzeCluster(
|
| 214 |
+
llama31_405B(),
|
| 215 |
+
{
|
| 216 |
+
microBatchSize: 1,
|
| 217 |
+
seqLength: 8192,
|
| 218 |
+
gradAccumSteps: 16,
|
| 219 |
+
precision: 'bf16',
|
| 220 |
+
activationCheckpointing: true,
|
| 221 |
+
optimizer: 'adamw',
|
| 222 |
+
},
|
| 223 |
+
llama405Cluster(),
|
| 224 |
+
{
|
| 225 |
+
tp: 8,
|
| 226 |
+
pp: 16,
|
| 227 |
+
cp: 1,
|
| 228 |
+
ep: 1,
|
| 229 |
+
distributedOptimizer: true,
|
| 230 |
+
fsdpShardGroupSize: 0,
|
| 231 |
+
zeroStage: 1,
|
| 232 |
+
},
|
| 233 |
+
)
|
| 234 |
+
|
| 235 |
+
expect(analysis.derivedParallelism.dp).toBe(128)
|
| 236 |
+
expect(analysis.feasible).toBe(true)
|
| 237 |
+
expect(analysis.communication.tp.totalVolumePerStepGB).toBeGreaterThan(0)
|
| 238 |
+
expect(analysis.communication.pp.totalVolumePerStepGB).toBeGreaterThan(0)
|
| 239 |
+
expect(analysis.communication.fsdp.totalVolumePerStepGB).toBe(0)
|
| 240 |
+
})
|
| 241 |
+
})
|
| 242 |
+
|
| 243 |
+
function llama405Cluster(): ClusterConfig {
|
| 244 |
+
return {
|
| 245 |
+
gpuType: h100_sxm(),
|
| 246 |
+
gpusPerNode: 8,
|
| 247 |
+
numNodes: 2048,
|
| 248 |
+
intraNodeBandwidthGBs: 900,
|
| 249 |
+
interNodeBandwidthGBs: 50,
|
| 250 |
+
nodesPerRack: 16,
|
| 251 |
+
rackLabel: 'rack',
|
| 252 |
+
nodeLabel: 'GPU host',
|
| 253 |
+
podLabel: 'rack',
|
| 254 |
+
}
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
function trinityCluster(): ClusterConfig {
|
| 258 |
+
return {
|
| 259 |
+
gpuType: b300(),
|
| 260 |
+
gpusPerNode: 8,
|
| 261 |
+
numNodes: 256,
|
| 262 |
+
intraNodeBandwidthGBs: 900,
|
| 263 |
+
interNodeBandwidthGBs: 50,
|
| 264 |
+
nodesPerRack: 9,
|
| 265 |
+
rackLabel: 'rack',
|
| 266 |
+
nodeLabel: 'GPU host',
|
| 267 |
+
podLabel: 'rack',
|
| 268 |
+
}
|
| 269 |
+
}
|
tsconfig.app.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"compilerOptions": {
|
| 3 |
+
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
|
| 4 |
+
"target": "ES2022",
|
| 5 |
+
"useDefineForClassFields": true,
|
| 6 |
+
"lib": ["ES2022", "DOM", "DOM.Iterable"],
|
| 7 |
+
"module": "ESNext",
|
| 8 |
+
"types": ["vite/client"],
|
| 9 |
+
"skipLibCheck": true,
|
| 10 |
+
|
| 11 |
+
/* Bundler mode */
|
| 12 |
+
"moduleResolution": "bundler",
|
| 13 |
+
"allowImportingTsExtensions": true,
|
| 14 |
+
"verbatimModuleSyntax": true,
|
| 15 |
+
"moduleDetection": "force",
|
| 16 |
+
"noEmit": true,
|
| 17 |
+
"jsx": "react-jsx",
|
| 18 |
+
|
| 19 |
+
/* Linting */
|
| 20 |
+
"strict": true,
|
| 21 |
+
"noUnusedLocals": true,
|
| 22 |
+
"noUnusedParameters": true,
|
| 23 |
+
"erasableSyntaxOnly": true,
|
| 24 |
+
"noFallthroughCasesInSwitch": true,
|
| 25 |
+
"noUncheckedSideEffectImports": true
|
| 26 |
+
},
|
| 27 |
+
"include": ["src"]
|
| 28 |
+
}
|
tsconfig.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"files": [],
|
| 3 |
+
"references": [
|
| 4 |
+
{ "path": "./tsconfig.app.json" },
|
| 5 |
+
{ "path": "./tsconfig.node.json" }
|
| 6 |
+
]
|
| 7 |
+
}
|
tsconfig.node.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"compilerOptions": {
|
| 3 |
+
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
|
| 4 |
+
"target": "ES2023",
|
| 5 |
+
"lib": ["ES2023"],
|
| 6 |
+
"module": "ESNext",
|
| 7 |
+
"types": ["node"],
|
| 8 |
+
"skipLibCheck": true,
|
| 9 |
+
|
| 10 |
+
/* Bundler mode */
|
| 11 |
+
"moduleResolution": "bundler",
|
| 12 |
+
"allowImportingTsExtensions": true,
|
| 13 |
+
"verbatimModuleSyntax": true,
|
| 14 |
+
"moduleDetection": "force",
|
| 15 |
+
"noEmit": true,
|
| 16 |
+
|
| 17 |
+
/* Linting */
|
| 18 |
+
"strict": true,
|
| 19 |
+
"noUnusedLocals": true,
|
| 20 |
+
"noUnusedParameters": true,
|
| 21 |
+
"erasableSyntaxOnly": true,
|
| 22 |
+
"noFallthroughCasesInSwitch": true,
|
| 23 |
+
"noUncheckedSideEffectImports": true
|
| 24 |
+
},
|
| 25 |
+
"include": ["vite.config.ts"]
|
| 26 |
+
}
|
vite.config.ts
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { defineConfig } from 'vite'
|
| 2 |
+
import react from '@vitejs/plugin-react'
|
| 3 |
+
|
| 4 |
+
export default defineConfig({
|
| 5 |
+
plugins: [react()],
|
| 6 |
+
server: {
|
| 7 |
+
host: '0.0.0.0',
|
| 8 |
+
port: 7860,
|
| 9 |
+
},
|
| 10 |
+
preview: {
|
| 11 |
+
host: '0.0.0.0',
|
| 12 |
+
port: 7860,
|
| 13 |
+
},
|
| 14 |
+
})
|
vitest.config.ts
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { defineConfig } from 'vitest/config'
|
| 2 |
+
|
| 3 |
+
export default defineConfig({
|
| 4 |
+
test: {
|
| 5 |
+
include: ['tests/**/*.test.ts'],
|
| 6 |
+
},
|
| 7 |
+
})
|