illustrated-cluster / tests /topologySceneModel.test.ts
joeddav's picture
Publish WIP HF Space snapshot
1f77aa7
import { describe, expect, it } from 'vitest'
import { buildTopologySceneModel, describeTarget } from '../src/lib/topologyScene'
import { analyzeCluster } from '../src/lib/trainingClusterModel'
import { buildWorkbenchViewModel } from '../src/lib/workbenchPresenter'
import { getScenarioWorkbenchConfig } from '../src/lib/workbench'
describe('topology scene model', () => {
it('groups nodes into racks using cluster metadata', () => {
const config = getScenarioWorkbenchConfig('trinity-pretraining')
const analysis = analyzeCluster(config.model, config.training, config.cluster, config.parallelism)
const viewModel = buildWorkbenchViewModel(config, analysis)
const scene = buildTopologySceneModel(viewModel)
expect(scene.pods.length).toBeGreaterThan(1)
expect(scene.nodes).toHaveLength(config.cluster.numNodes)
expect(scene.objectCounts.gpus).toBe(config.cluster.numNodes * config.cluster.gpusPerNode)
expect(scene.lodPolicy.maxScale).toBeGreaterThan(100)
})
it('describes GPUs with analysis-backed shard indices, including EP and FSDP', () => {
const config = getScenarioWorkbenchConfig('trinity-pretraining')
const analysis = analyzeCluster(config.model, config.training, config.cluster, config.parallelism)
const viewModel = buildWorkbenchViewModel(config, analysis)
const scene = buildTopologySceneModel(viewModel)
const gpu = scene.nodes.flatMap((node) => node.gpus).find((item) => item.memoryUsedGB > 0)
if (!gpu) {
throw new Error('expected at least one allocated gpu')
}
const details = describeTarget(scene, viewModel, { kind: 'gpu', id: gpu.id })
expect(details?.metrics.some((metric) => metric.label === 'Expert lane')).toBe(true)
expect(details?.metrics.some((metric) => metric.label === 'FSDP rank')).toBe(true)
})
it('keeps the scene renderable for infeasible configurations', () => {
const config = getScenarioWorkbenchConfig('infeasible-memory')
const analysis = analyzeCluster(config.model, config.training, config.cluster, config.parallelism)
const viewModel = buildWorkbenchViewModel(config, analysis)
const scene = buildTopologySceneModel(viewModel)
expect(analysis.feasible).toBe(false)
expect(scene.nodes.length).toBeGreaterThan(0)
expect(viewModel.warnings[0]).toContain('exceeding')
})
it('exposes EP traffic links in the Trinity preset', () => {
const config = getScenarioWorkbenchConfig('trinity-pretraining')
const analysis = analyzeCluster(config.model, config.training, config.cluster, config.parallelism)
const viewModel = buildWorkbenchViewModel(config, analysis)
const scene = buildTopologySceneModel(viewModel)
expect(scene.rowLinks.concat(scene.columnLinks, scene.busLinks).some((link) => link.trafficType === 'ep')).toBe(true)
})
it('exposes CP traffic links in the OLMo long-context preset', () => {
const config = getScenarioWorkbenchConfig('olmo-long-context')
const analysis = analyzeCluster(config.model, config.training, config.cluster, config.parallelism)
const viewModel = buildWorkbenchViewModel(config, analysis)
const scene = buildTopologySceneModel(viewModel)
expect(scene.rowLinks.concat(scene.columnLinks, scene.busLinks).some((link) => link.trafficType === 'cp')).toBe(true)
})
it('collapses cross-rack links to rack centers instead of drawing node-to-node lines across racks', () => {
const config = getScenarioWorkbenchConfig('llama-pretraining')
const analysis = analyzeCluster(config.model, config.training, config.cluster, config.parallelism)
const viewModel = buildWorkbenchViewModel(config, analysis)
const scene = buildTopologySceneModel(viewModel)
const rackLink = scene.rowLinks
.concat(scene.columnLinks)
.find((link) => link.scope === 'rack' && link.transport === 'infiniband')
expect(rackLink).toBeDefined()
expect(scene.pods.some((pod) => pod.centerX === rackLink?.x1 && pod.centerY === rackLink?.y1)).toBe(true)
expect(scene.pods.some((pod) => pod.centerX === rackLink?.x2 && pod.centerY === rackLink?.y2)).toBe(true)
})
it('keeps stable focus and lod frames for every gpu', () => {
const config = getScenarioWorkbenchConfig('llama-pretraining')
const analysis = analyzeCluster(config.model, config.training, config.cluster, config.parallelism)
const viewModel = buildWorkbenchViewModel(config, analysis)
const scene = buildTopologySceneModel(viewModel)
const gpus = scene.nodes.flatMap((node) => node.gpus)
expect(gpus.length).toBe(scene.objectCounts.gpus)
expect(gpus.every((gpu) => gpu.focusFrame.width >= gpu.width && gpu.lodFrame.width === gpu.width)).toBe(true)
expect(gpus.every((gpu) => gpu.focusFrame.height >= gpu.height && gpu.lodFrame.height === gpu.height)).toBe(true)
})
})