illustrated-cluster / src /components /ControlsPanel.tsx
joeddav's picture
Publish WIP HF Space snapshot
1f77aa7
import {
applyGpuPreset,
applyExamplePhase,
applyExamplePreset,
getExamplePresetOptions,
getFactorOptions,
getGpuPresetId,
getGpuPresetOptions,
getPhaseOptions,
type WorkbenchConfig,
} from '../lib/workbench'
import { type WorkbenchViewModel } from '../lib/workbenchPresenter'
type ControlsPanelProps = {
config: WorkbenchConfig
onChange: (next: WorkbenchConfig) => void
onReset: () => void
viewModel: WorkbenchViewModel
}
type OptionStripProps = {
label: string
caption: string
value: number
options: number[]
onSelect: (value: number) => void
}
type SelectFieldProps = {
label: string
value: string
options: Array<{ value: string; label: string }>
onChange: (value: string) => void
}
type NumberFieldProps = {
label: string
value: number
min: number
step?: number
onChange: (value: number) => void
}
type ToggleFieldProps = {
label: string
checked: boolean
onChange: (checked: boolean) => void
}
function OptionStrip({ label, caption, value, options, onSelect }: OptionStripProps) {
return (
<div className="control-card">
<div className="control-card-header">
<div>
<h3>{label}</h3>
<p>{caption}</p>
</div>
<span className="control-badge">{value}x</span>
</div>
<div className="option-strip" role="group" aria-label={label}>
{options.map((option) => (
<button
key={option}
type="button"
className={option === value ? 'option-chip active' : 'option-chip'}
onClick={() => onSelect(option)}
>
{option}
</button>
))}
</div>
</div>
)
}
function SelectField({ label, value, options, onChange }: SelectFieldProps) {
return (
<label className="control-field">
<span>{label}</span>
<select value={value} onChange={(event) => onChange(event.target.value)}>
{options.map((option) => (
<option key={option.value} value={option.value}>
{option.label}
</option>
))}
</select>
</label>
)
}
function NumberField({ label, value, min, step = 1, onChange }: NumberFieldProps) {
return (
<label className="control-field">
<span>{label}</span>
<input
type="number"
min={min}
step={step}
value={value}
onChange={(event) => onChange(Math.max(min, Number(event.target.value) || min))}
/>
</label>
)
}
function ToggleField({ label, checked, onChange }: ToggleFieldProps) {
return (
<label className="control-toggle">
<input
type="checkbox"
checked={checked}
onChange={(event) => onChange(event.target.checked)}
/>
<span>{label}</span>
</label>
)
}
export function ControlsPanel({
config,
onChange,
onReset,
viewModel,
}: ControlsPanelProps) {
const totalGPUs = config.cluster.gpusPerNode * config.cluster.numNodes
const factorOptions = {
tp: getFactorOptions(config.cluster.gpusPerNode, config.parallelism.tp),
pp: getFactorOptions(totalGPUs, config.parallelism.pp),
cp: getFactorOptions(totalGPUs, config.parallelism.cp),
ep: getFactorOptions(config.cluster.gpusPerNode, config.parallelism.ep),
}
const gpuPresetId = getGpuPresetId(config.cluster.gpuType)
const updateTraining = <K extends keyof WorkbenchConfig['training']>(
key: K,
value: WorkbenchConfig['training'][K],
) => {
onChange({
...config,
customized: true,
training: {
...config.training,
[key]: value,
},
})
}
const updateModel = <K extends keyof WorkbenchConfig['model']>(
key: K,
value: WorkbenchConfig['model'][K],
) => {
onChange({
...config,
customized: true,
model: {
...config.model,
[key]: value,
},
})
}
const updateCluster = <K extends keyof WorkbenchConfig['cluster']>(
key: K,
value: WorkbenchConfig['cluster'][K],
) => {
onChange({
...config,
customized: true,
cluster: {
...config.cluster,
[key]: value,
},
})
}
const updateAttentionProfile = (
nextAttentionProfile: WorkbenchConfig['model']['attentionProfile'],
) => {
updateModel('attentionProfile', nextAttentionProfile)
}
const updateMoe = (nextMoe: NonNullable<WorkbenchConfig['model']['moe']> | undefined) => {
updateModel('moe', nextMoe)
}
const updateParallelism = <K extends keyof WorkbenchConfig['parallelism']>(
key: K,
value: WorkbenchConfig['parallelism'][K],
) => {
onChange({
...config,
customized: true,
parallelism: {
...config.parallelism,
[key]: value,
},
})
}
return (
<section className="controls-band">
<div className="controls-head">
<div>
<p className="mini-label">Controls</p>
<h2>Example / model / cluster / training / parallelism</h2>
</div>
<div className="controls-meta">
<span>{viewModel.analysis.totalGPUs.toLocaleString()} GPUs in cluster</span>
<span>
{config.parallelism.tp}×{config.parallelism.pp}×{config.parallelism.cp}×
{config.parallelism.ep} · DP {viewModel.analysis.derivedParallelism.dp}
</span>
<button type="button" className="reset-chip" onClick={onReset}>
reset
</button>
</div>
</div>
<div className="controls-stack">
<div className="control-card">
<div className="control-card-header">
<div>
<h3>Examples</h3>
<p>Built-in frontier runs as starting points for custom model and cluster edits</p>
</div>
{config.customized ? <span className="control-badge">customized</span> : null}
</div>
<div className="field-grid">
<SelectField
label="Example preset"
value={config.examplePresetId}
options={getExamplePresetOptions().map((option) => ({
value: option.id,
label: option.label,
}))}
onChange={(value) =>
onChange(applyExamplePreset(config, value as typeof config.examplePresetId))
}
/>
<SelectField
label="Phase"
value={config.phaseId}
options={getPhaseOptions(config.examplePresetId).map((option) => ({
value: option.id,
label: option.label,
}))}
onChange={(value) => onChange(applyExamplePhase(config, value as typeof config.phaseId))}
/>
</div>
</div>
<div className="control-card">
<div className="control-card-header">
<div>
<h3>Model</h3>
<p>Core architecture dimensions, attention layout, and optional MoE settings</p>
</div>
</div>
<div className="field-grid field-grid-wide">
<SelectField
label="Architecture"
value={config.model.architecture}
options={[
{ value: 'dense', label: 'dense' },
{ value: 'moe', label: 'moe' },
]}
onChange={(value) => {
const architecture = value as WorkbenchConfig['model']['architecture']
onChange({
...config,
customized: true,
model: {
...config.model,
architecture,
moe:
architecture === 'moe'
? (config.model.moe ?? {
numExperts: 64,
expertsPerToken: 2,
numDenseLayers: Math.min(config.model.numLayers, 4),
expertIntermediateSize: Math.max(1024, config.model.hiddenDim),
activeParamsPerToken: undefined,
})
: undefined,
},
})
}}
/>
<NumberField
label="Hidden dim"
value={config.model.hiddenDim}
min={128}
step={128}
onChange={(value) => updateModel('hiddenDim', value)}
/>
<NumberField
label="Layers"
value={config.model.numLayers}
min={1}
onChange={(value) => updateModel('numLayers', value)}
/>
<NumberField
label="Attention heads"
value={config.model.numHeads}
min={1}
onChange={(value) => updateModel('numHeads', value)}
/>
<NumberField
label="KV heads"
value={config.model.numKVHeads}
min={1}
onChange={(value) => updateModel('numKVHeads', value)}
/>
<NumberField
label="Vocab size"
value={config.model.vocabSize}
min={256}
onChange={(value) => updateModel('vocabSize', value)}
/>
<NumberField
label="Intermediate size"
value={config.model.intermediateSize}
min={256}
onChange={(value) => updateModel('intermediateSize', value)}
/>
<SelectField
label="Attention profile"
value={config.model.attentionProfile?.type ?? 'full'}
options={[
{ value: 'full', label: 'full attention' },
{ value: 'hybrid', label: 'hybrid attention' },
]}
onChange={(value) => {
if (value === 'full') {
updateAttentionProfile({ type: 'full' })
return
}
updateAttentionProfile({
type: 'hybrid',
slidingWindowSize: config.model.attentionProfile?.slidingWindowSize ?? 4096,
globalAttentionFraction: config.model.attentionProfile?.globalAttentionFraction,
globalAttentionEveryN: config.model.attentionProfile?.globalAttentionEveryN,
})
}}
/>
<div className="control-field control-field-toggle">
<span>Tied embeddings</span>
<ToggleField
label="share output head"
checked={config.model.tiedEmbeddings}
onChange={(value) => updateModel('tiedEmbeddings', value)}
/>
</div>
{config.model.attentionProfile?.type === 'hybrid' ? (
<>
<NumberField
label="Sliding window"
value={config.model.attentionProfile.slidingWindowSize ?? 4096}
min={0}
step={256}
onChange={(value) =>
updateAttentionProfile({
...config.model.attentionProfile,
type: 'hybrid',
slidingWindowSize: value > 0 ? value : undefined,
})
}
/>
<NumberField
label="Global attention fraction"
value={config.model.attentionProfile.globalAttentionFraction ?? 0}
min={0}
step={0.05}
onChange={(value) =>
updateAttentionProfile({
...config.model.attentionProfile,
type: 'hybrid',
globalAttentionFraction: value > 0 ? value : undefined,
})
}
/>
<NumberField
label="Global every N"
value={config.model.attentionProfile.globalAttentionEveryN ?? 0}
min={0}
onChange={(value) =>
updateAttentionProfile({
...config.model.attentionProfile,
type: 'hybrid',
globalAttentionEveryN: value > 0 ? value : undefined,
})
}
/>
</>
) : null}
{config.model.architecture === 'moe' && config.model.moe ? (
<>
<NumberField
label="Experts"
value={config.model.moe.numExperts}
min={1}
onChange={(value) => updateMoe({ ...config.model.moe!, numExperts: value })}
/>
<NumberField
label="Experts per token"
value={config.model.moe.expertsPerToken}
min={1}
onChange={(value) =>
updateMoe({ ...config.model.moe!, expertsPerToken: value })
}
/>
<NumberField
label="Dense layers"
value={config.model.moe.numDenseLayers}
min={0}
onChange={(value) => updateMoe({ ...config.model.moe!, numDenseLayers: value })}
/>
<NumberField
label="Expert intermediate"
value={config.model.moe.expertIntermediateSize}
min={256}
onChange={(value) =>
updateMoe({ ...config.model.moe!, expertIntermediateSize: value })
}
/>
<NumberField
label="Active params / token"
value={config.model.moe.activeParamsPerToken ?? 0}
min={0}
step={1000000}
onChange={(value) =>
updateMoe({
...config.model.moe!,
activeParamsPerToken: value > 0 ? value : undefined,
})
}
/>
</>
) : null}
</div>
</div>
<div className="control-card">
<div className="control-card-header">
<div>
<h3>Cluster</h3>
<p>GPU spec, node count, and rack-local topology for the physical fabric</p>
</div>
</div>
<div className="field-grid field-grid-wide">
<SelectField
label="GPU preset"
value={gpuPresetId}
options={[
...getGpuPresetOptions().map((option) => ({
value: option.id,
label: option.label,
})),
{ value: 'custom', label: 'custom GPU' },
]}
onChange={(value) => {
if (value === 'custom') {
return
}
onChange(applyGpuPreset(config, value as Parameters<typeof applyGpuPreset>[1]))
}}
/>
<label className="control-field">
<span>GPU name</span>
<input
type="text"
value={config.cluster.gpuType.name}
onChange={(event) =>
updateCluster('gpuType', {
...config.cluster.gpuType,
name: event.target.value,
})
}
/>
</label>
<NumberField
label="HBM capacity (GB)"
value={config.cluster.gpuType.hbmCapacityGB}
min={1}
onChange={(value) =>
updateCluster('gpuType', {
...config.cluster.gpuType,
hbmCapacityGB: value,
})
}
/>
<NumberField
label="Peak BF16 TFLOPs"
value={config.cluster.gpuType.peakTFLOPsBF16}
min={1}
onChange={(value) =>
updateCluster('gpuType', {
...config.cluster.gpuType,
peakTFLOPsBF16: value,
})
}
/>
<NumberField
label="HBM bandwidth (TB/s)"
value={config.cluster.gpuType.memBandwidthTBs}
min={0.1}
step={0.05}
onChange={(value) =>
updateCluster('gpuType', {
...config.cluster.gpuType,
memBandwidthTBs: value,
})
}
/>
<NumberField
label="GPUs per node"
value={config.cluster.gpusPerNode}
min={1}
onChange={(value) => updateCluster('gpusPerNode', value)}
/>
<NumberField
label="Nodes"
value={config.cluster.numNodes}
min={1}
onChange={(value) => updateCluster('numNodes', value)}
/>
<NumberField
label="Nodes per rack"
value={config.cluster.nodesPerRack ?? config.cluster.numNodes}
min={1}
onChange={(value) => updateCluster('nodesPerRack', value)}
/>
<NumberField
label="Intra-node bandwidth (GB/s)"
value={config.cluster.intraNodeBandwidthGBs}
min={1}
onChange={(value) => updateCluster('intraNodeBandwidthGBs', value)}
/>
<NumberField
label="Inter-node bandwidth (GB/s)"
value={config.cluster.interNodeBandwidthGBs}
min={1}
onChange={(value) => updateCluster('interNodeBandwidthGBs', value)}
/>
</div>
</div>
<div className="control-card">
<div className="control-card-header">
<div>
<h3>Training</h3>
<p>Batching, precision, optimizer state, and recompute</p>
</div>
</div>
<div className="field-grid field-grid-wide">
<NumberField
label="Micro-batch"
value={config.training.microBatchSize}
min={1}
onChange={(value) => updateTraining('microBatchSize', value)}
/>
<NumberField
label="Seq length"
value={config.training.seqLength}
min={256}
step={256}
onChange={(value) => updateTraining('seqLength', value)}
/>
<NumberField
label="Grad accum"
value={config.training.gradAccumSteps}
min={1}
onChange={(value) => updateTraining('gradAccumSteps', value)}
/>
<SelectField
label="Precision"
value={config.training.precision}
options={[
{ value: 'bf16', label: 'bf16' },
{ value: 'fp16', label: 'fp16' },
{ value: 'fp8', label: 'fp8' },
{ value: 'fp32', label: 'fp32' },
]}
onChange={(value) =>
updateTraining('precision', value as WorkbenchConfig['training']['precision'])
}
/>
<SelectField
label="Optimizer"
value={config.training.optimizer}
options={[
{ value: 'adamw', label: 'adamw' },
{ value: 'adam', label: 'adam' },
{ value: 'muon', label: 'muon' },
{ value: 'sgd', label: 'sgd' },
]}
onChange={(value) =>
updateTraining('optimizer', value as WorkbenchConfig['training']['optimizer'])
}
/>
<div className="control-field control-field-toggle">
<span>Activation recompute</span>
<ToggleField
label="checkpointing enabled"
checked={config.training.activationCheckpointing}
onChange={(value) => updateTraining('activationCheckpointing', value)}
/>
</div>
</div>
</div>
<div className="controls-grid controls-grid-parallelism">
<OptionStrip
label="TP"
caption="tensor shards"
value={config.parallelism.tp}
options={factorOptions.tp}
onSelect={(value) => updateParallelism('tp', value)}
/>
<OptionStrip
label="PP"
caption="pipeline stages"
value={config.parallelism.pp}
options={factorOptions.pp}
onSelect={(value) => updateParallelism('pp', value)}
/>
<OptionStrip
label="CP"
caption="context shards"
value={config.parallelism.cp}
options={factorOptions.cp}
onSelect={(value) => updateParallelism('cp', value)}
/>
<OptionStrip
label="EP"
caption="expert lanes"
value={config.parallelism.ep}
options={factorOptions.ep}
onSelect={(value) => updateParallelism('ep', value)}
/>
<OptionStrip
label="ZeRO"
caption="state sharding"
value={config.parallelism.zeroStage}
options={[0, 1, 2, 3]}
onSelect={(value) => updateParallelism('zeroStage', value as 0 | 1 | 2 | 3)}
/>
<div className="control-card">
<div className="control-card-header">
<div>
<h3>Megatron-style derived DP</h3>
<p>DP is derived from cluster size and the other parallel axes</p>
</div>
<span className="control-badge">{viewModel.analysis.derivedParallelism.dp}x</span>
</div>
<div className="field-grid">
<NumberField
label="FSDP shard group"
value={config.parallelism.fsdpShardGroupSize}
min={0}
step={8}
onChange={(value) => updateParallelism('fsdpShardGroupSize', value)}
/>
<div className="control-field control-field-toggle">
<span>Distributed optimizer</span>
<ToggleField
label="optimizer state sharding"
checked={config.parallelism.distributedOptimizer}
onChange={(value) => updateParallelism('distributedOptimizer', value)}
/>
</div>
</div>
</div>
</div>
</div>
</section>
)
}