Controls

Example / model / cluster / training / parallelism

{viewModel.analysis.totalGPUs.toLocaleString()} GPUs in cluster {config.parallelism.tp}×{config.parallelism.pp}×{config.parallelism.cp}× {config.parallelism.ep} · DP {viewModel.analysis.derivedParallelism.dp}

Examples

Built-in frontier runs as starting points for custom model and cluster edits

{config.customized ? customized : null}

({ value: option.id, label: option.label, }))} onChange={(value) => onChange(applyExamplePreset(config, value as typeof config.examplePresetId)) } /> ({ value: option.id, label: option.label, }))} onChange={(value) => onChange(applyExamplePhase(config, value as typeof config.phaseId))} />

Model

Core architecture dimensions, attention layout, and optional MoE settings

{ const architecture = value as WorkbenchConfig['model']['architecture'] onChange({ ...config, customized: true, model: { ...config.model, architecture, moe: architecture === 'moe' ? (config.model.moe ?? { numExperts: 64, expertsPerToken: 2, numDenseLayers: Math.min(config.model.numLayers, 4), expertIntermediateSize: Math.max(1024, config.model.hiddenDim), activeParamsPerToken: undefined, }) : undefined, }, }) }} /> updateModel('hiddenDim', value)} /> updateModel('numLayers', value)} /> updateModel('numHeads', value)} /> updateModel('numKVHeads', value)} /> updateModel('vocabSize', value)} /> updateModel('intermediateSize', value)} /> { if (value === 'full') { updateAttentionProfile({ type: 'full' }) return } updateAttentionProfile({ type: 'hybrid', slidingWindowSize: config.model.attentionProfile?.slidingWindowSize ?? 4096, globalAttentionFraction: config.model.attentionProfile?.globalAttentionFraction, globalAttentionEveryN: config.model.attentionProfile?.globalAttentionEveryN, }) }} />

Tied embeddings updateModel('tiedEmbeddings', value)} />

{config.model.attentionProfile?.type === 'hybrid' ? ( <> updateAttentionProfile({ ...config.model.attentionProfile, type: 'hybrid', slidingWindowSize: value > 0 ? value : undefined, }) } /> updateAttentionProfile({ ...config.model.attentionProfile, type: 'hybrid', globalAttentionFraction: value > 0 ? value : undefined, }) } /> updateAttentionProfile({ ...config.model.attentionProfile, type: 'hybrid', globalAttentionEveryN: value > 0 ? value : undefined, }) } /> ) : null} {config.model.architecture === 'moe' && config.model.moe ? ( <> updateMoe({ ...config.model.moe!, numExperts: value })} /> updateMoe({ ...config.model.moe!, expertsPerToken: value }) } /> updateMoe({ ...config.model.moe!, numDenseLayers: value })} /> updateMoe({ ...config.model.moe!, expertIntermediateSize: value }) } /> updateMoe({ ...config.model.moe!, activeParamsPerToken: value > 0 ? value : undefined, }) } /> ) : null}

Cluster

GPU spec, node count, and rack-local topology for the physical fabric

({ value: option.id, label: option.label, })), { value: 'custom', label: 'custom GPU' }, ]} onChange={(value) => { if (value === 'custom') { return } onChange(applyGpuPreset(config, value as Parameters[1])) }} /> GPU name updateCluster('gpuType', { ...config.cluster.gpuType, name: event.target.value, }) } /> updateCluster('gpuType', { ...config.cluster.gpuType, hbmCapacityGB: value, }) } /> updateCluster('gpuType', { ...config.cluster.gpuType, peakTFLOPsBF16: value, }) } /> updateCluster('gpuType', { ...config.cluster.gpuType, memBandwidthTBs: value, }) } /> updateCluster('gpusPerNode', value)} /> updateCluster('numNodes', value)} /> updateCluster('nodesPerRack', value)} /> updateCluster('intraNodeBandwidthGBs', value)} /> updateCluster('interNodeBandwidthGBs', value)} />

Training

Batching, precision, optimizer state, and recompute

updateTraining('microBatchSize', value)} /> updateTraining('seqLength', value)} /> updateTraining('gradAccumSteps', value)} /> updateTraining('precision', value as WorkbenchConfig['training']['precision']) } /> updateTraining('optimizer', value as WorkbenchConfig['training']['optimizer']) } />

Activation recompute updateTraining('activationCheckpointing', value)} />

updateParallelism('tp', value)} /> updateParallelism('pp', value)} /> updateParallelism('cp', value)} /> updateParallelism('ep', value)} /> updateParallelism('zeroStage', value as 0 | 1 | 2 | 3)} />

Megatron-style derived DP

DP is derived from cluster size and the other parallel axes

{viewModel.analysis.derivedParallelism.dp}x

updateParallelism('fsdpShardGroupSize', value)} />