George Yang
Feat: Sync all features from main repository
e9c64c8
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>GPU Memory Calculator for LLM Training</title>
<link rel="stylesheet" href="/static/css/styles.css">
</head>
<body>
<div class="container">
<header>
<h1>๐Ÿš€ GPU Memory Calculator</h1>
<p class="subtitle">For LLM Training, Inference, and Multi-Node Optimization</p>
</header>
<!-- Tab Navigation -->
<nav class="tab-navigation">
<button class="tab-btn active" data-tab="training">๐ŸŽ“ Training</button>
<button class="tab-btn" data-tab="inference">๐Ÿš€ Inference</button>
<button class="tab-btn" data-tab="multinode">๐ŸŒ Multi-Node</button>
</nav>
<div class="main-content">
<!-- Training Tab -->
<div id="training-tab" class="tab-content active">
<!-- Configuration Panel -->
<div class="config-panel">
<h2>Training Configuration</h2>
<!-- Model Settings -->
<section class="config-section">
<h3>Model Settings</h3>
<div class="form-group">
<label for="preset-select">Preset Model:</label>
<div class="preset-row">
<select id="preset-select">
<option value="custom">Custom</option>
<optgroup label="Dense Models">
<option value="llama2-7b">LLaMA 2 7B</option>
<option value="llama2-13b">LLaMA 2 13B</option>
<option value="llama2-70b">LLaMA 2 70B</option>
<option value="gpt3-175b">GPT-3 175B</option>
</optgroup>
<optgroup label="MoE (Mixture of Experts) Models">
<option value="glm-4.7-355b">GLM-4.7 355B (MoE) โญ Latest</option>
<option value="glm-4.5-air-106b">GLM-4.5 Air 106B (MoE) โญ Air</option>
<option value="glm-4-9b">GLM-4 9B (MoE)</option>
<option value="mixtral-8x7b">Mixtral 8x7B (MoE)</option>
<option value="qwen1.5-moe-a2.7b">Qwen1.5-MoE-A2.7B</option>
<option value="deepseek-moe-16b">DeepSeek-MoE 16B</option>
</optgroup>
</select>
<button id="fetch-hf-btn" class="btn-tertiary" title="Fetch from HuggingFace Hub" type="button">
<span>๐Ÿค— Fetch from HF</span>
</button>
</div>
</div>
<!-- HF Fetch Panel (hidden by default) -->
<div id="hf-fetch-panel" style="display: none;" class="hf-fetch-panel">
<div class="form-group">
<label for="hf-model-id">HuggingFace Model ID:</label>
<input type="text" id="hf-model-id" placeholder="e.g., meta-llama/Llama-2-7b-hf" aria-describedby="hf-model-help">
<span id="hf-model-help" class="help-text">Enter the HuggingFace model repository ID (e.g., meta-llama/Llama-2-7b-hf)</span>
</div>
<div class="form-group">
<label for="hf-token">HF Token (optional, for private models):</label>
<input type="password" id="hf-token" placeholder="hf_xxxxxxxxxxxx" aria-describedby="hf-token-help">
<span id="hf-token-help" class="help-text">Leave empty for public models, provide token for gated/private models</span>
</div>
<div class="button-group">
<button id="hf-fetch-submit" class="btn-primary" type="button">Fetch Model</button>
<button id="hf-fetch-cancel" class="btn-secondary" type="button">Cancel</button>
</div>
<div id="hf-loading" style="display: none;" class="loading-message">
<p>Fetching model from HuggingFace Hub...</p>
</div>
<div id="hf-error" style="display: none;" class="error-message" aria-live="poloice"></div>
<div id="hf-success" style="display: none;" class="success-message" aria-live="police"></div>
</div>
<div class="form-grid">
<div class="form-group" data-tooltip="Name of your model">
<label for="model-name" id="label-model-name">Model Name:</label>
<input type="text" id="model-name" value="custom-model" aria-labelledby="label-model-name">
</div>
<div class="form-group" data-tooltip="Total number of parameters (e.g., 7B, 7000M, 7000000000)">
<label for="num-params" id="label-num-params">Parameters:</label>
<input type="text" id="num-params" value="7B" placeholder="e.g., 7B" aria-labelledby="label-num-params" aria-describedby="help-num-params">
<span id="help-num-params" class="sr-only">Enter model size as number with optional suffix: 7B, 7000M, or 7000000000</span>
</div>
<div class="form-group" data-tooltip="Number of transformer layers">
<label for="num-layers" id="label-num-layers">Layers:</label>
<input type="number" id="num-layers" value="32" min="1" aria-labelledby="label-num-layers">
</div>
<div class="form-group" data-tooltip="Hidden dimension size">
<label for="hidden-size" id="label-hidden-size">Hidden Size:</label>
<input type="number" id="hidden-size" value="4096" min="1" aria-labelledby="label-hidden-size">
</div>
<div class="form-group" data-tooltip="Number of attention heads">
<label for="num-heads" id="label-num-heads">Attention Heads:</label>
<input type="number" id="num-heads" value="32" min="1" aria-labelledby="label-num-heads">
</div>
<div class="form-group" data-tooltip="Vocabulary size">
<label for="vocab-size" id="label-vocab-size">Vocab Size:</label>
<input type="number" id="vocab-size" value="32000" min="1" aria-labelledby="label-vocab-size">
</div>
<div class="form-group" data-tooltip="Maximum sequence length">
<label for="seq-len" id="label-seq-len">Max Seq Length:</label>
<input type="number" id="seq-len" value="4096" min="1" aria-labelledby="label-seq-len">
</div>
</div>
</section>
<!-- MoE (Mixture of Experts) Settings -->
<section class="config-section">
<h3>Mixture of Experts (MoE)</h3>
<div class="form-group" data-tooltip="Enable Mixture of Experts architecture">
<label for="moe-enabled">
<input type="checkbox" id="moe-enabled">
Enable MoE
</label>
</div>
<div id="moe-fields" style="display:none;">
<div class="form-grid">
<div class="form-group" data-tooltip="Total number of experts in the model">
<label for="num-experts">Number of Experts:</label>
<input type="number" id="num-experts" value="8" min="1" max="256">
</div>
<div class="form-group" data-tooltip="Number of experts activated per token (top-k routing)">
<label for="top-k">Top-K (active experts):</label>
<input type="number" id="top-k" value="2" min="1" max="8">
</div>
<div class="form-group" data-tooltip="Expert intermediate layer size (default: 4x hidden_size)">
<label for="expert-intermediate-size">Expert Intermediate Size:</label>
<input type="number" id="expert-intermediate-size" value="" placeholder="Auto (4x hidden)" min="1">
</div>
<div class="form-group" data-tooltip="Shared expert intermediate size (for models like GLM)">
<label for="shared-expert-size">Shared Expert Size:</label>
<input type="number" id="shared-expert-size" value="" placeholder="None" min="1">
</div>
</div>
<p class="info-text">With MoE, only <strong><span id="active-experts-display">2</span></strong> of <strong><span id="total-experts-display">8</span></strong> experts are active per token, reducing activation memory.</p>
</div>
</section>
<!-- Training Settings -->
<section class="config-section">
<h3>Training Settings</h3>
<div class="form-grid">
<div class="form-group">
<label for="batch-size" data-tooltip="Batch size per GPU">Batch Size:</label>
<input type="number" id="batch-size" value="4" min="1">
<input type="range" id="batch-size-slider" min="1" max="128" value="4">
</div>
<div class="form-group" data-tooltip="Gradient accumulation steps">
<label for="grad-accum">Gradient Accumulation:</label>
<input type="number" id="grad-accum" value="4" min="1">
</div>
<div class="form-group">
<label for="optimizer" data-tooltip="Optimizer type">Optimizer:</label>
<select id="optimizer">
<option value="adamw">AdamW</option>
<option value="adam">Adam</option>
<option value="sgd">SGD</option>
<option value="adamw_8bit">AdamW 8-bit</option>
</select>
</div>
<div class="form-group">
<label for="dtype" data-tooltip="Data type for training">Precision:</label>
<select id="dtype">
<option value="bf16" selected>BF16</option>
<option value="fp16">FP16</option>
<option value="fp32">FP32</option>
<option value="int8">INT8</option>
<option value="int4">INT4</option>
</select>
</div>
<div class="form-group">
<label for="activation-checkpointing" data-tooltip="Activation checkpointing level (0=none, 4=full)">
Activation Checkpointing:
</label>
<select id="activation-checkpointing">
<option value="0">0: None (most memory)</option>
<option value="1">1: Checkpoint attention output</option>
<option value="2" selected>2: Checkpoint attention input</option>
<option value="3">3: Checkpoint layer + attention</option>
<option value="4">4: Full checkpointing (least memory)</option>
</select>
</div>
</div>
</section>
<!-- Parallelism Settings -->
<section class="config-section">
<h3>Parallelism</h3>
<div class="form-grid">
<div class="form-group" data-tooltip="Tensor parallelism degree">
<label for="tensor-pp">Tensor PP:</label>
<input type="number" id="tensor-pp" value="1" min="1" max="8">
</div>
<div class="form-group" data-tooltip="Pipeline parallelism degree">
<label for="pipeline-pp">Pipeline PP:</label>
<input type="number" id="pipeline-pp" value="1" min="1" max="16">
</div>
<div class="form-group" data-tooltip="Data parallelism degree">
<label for="data-pp">Data PP:</label>
<input type="number" id="data-pp" value="8" min="1">
</div>
<div class="form-group" data-tooltip="Enable sequence parallelism">
<label for="seq-parallel">
<input type="checkbox" id="seq-parallel">
Sequence Parallel
</label>
</div>
</div>
<p class="info-text">Effective GPUs: <span id="effective-gpus">8</span></p>
</section>
<!-- Engine Settings -->
<section class="config-section">
<h3>Training Engine</h3>
<div class="form-group">
<label for="engine-type" data-tooltip="Training framework/engine">Engine Type:</label>
<select id="engine-type">
<option value="pytorch_ddp">PyTorch DDP</option>
<option value="deepspeed" selected>DeepSpeed ZeRO</option>
<option value="megatron_lm">Megatron-LM</option>
<option value="fsdp">PyTorch FSDP</option>
<option value="megatron_deepspeed">Megatron + DeepSpeed</option>
</select>
</div>
<div id="engine-options">
<!-- Dynamic fields based on engine type -->
<!-- DeepSpeed ZeRO options -->
<div class="form-group" id="zero-stage-group">
<label for="zero-stage" data-tooltip="DeepSpeed ZeRO stage (0-3)">ZeRO Stage:</label>
<select id="zero-stage">
<option value="0">0: Disabled</option>
<option value="1">1: Shard optimizer states</option>
<option value="2">2: Shard optimizer + gradients</option>
<option value="3" selected>3: Shard everything</option>
</select>
</div>
<div class="form-group" id="offload-opt-group">
<label for="offload-optimizer" data-tooltip="CPU offload for optimizer states">Offload Optimizer:</label>
<select id="offload-optimizer">
<option value="none">None</option>
<option value="cpu" selected>CPU</option>
<option value="nvme">NVMe</option>
</select>
</div>
<div class="form-group" id="offload-param-group">
<label for="offload-param" data-tooltip="CPU offload for parameters">Offload Parameters:</label>
<select id="offload-param">
<option value="none" selected>None</option>
<option value="cpu">CPU</option>
<option value="nvme">NVMe</option>
</select>
</div>
<!-- ZeRO-Init option -->
<div class="form-group" id="zero-init-group">
<label for="zero-init" data-tooltip="Use ZeRO initialization (reduces memory during init)">
<input type="checkbox" id="zero-init" checked>
ZeRO Init (ZeRO-3)
</label>
</div>
<!-- FSDP Sharding Strategy -->
<div class="form-group" id="sharding-strategy-group" style="display:none;">
<label for="sharding-strategy" data-tooltip="FSDP sharding strategy">Sharding Strategy:</label>
<select id="sharding-strategy">
<option value="no_shard">No Sharding (like DDP)</option>
<option value="shard_grad_op">Shard Gradients + Optimizer (ZeRO-2)</option>
<option value="full_shard" selected>Full Shard (ZeRO-3)</option>
</select>
</div>
<!-- Megatron-specific options -->
<div class="form-group" id="megatron-options" style="display:none;">
<label class="group-label">Megatron-LM Options:</label>
<div class="form-group" style="margin-top: 10px;">
<label for="model-parallelism" data-tooltip="Model parallelism strategy">
<input type="checkbox" id="use-distributed-optimizer">
Use Distributed Optimizer
</label>
</div>
<div class="form-group" style="margin-top: 5px;">
<label for="num-micro-batches" data-tooltip="Number of micro-batches for pipeline parallelism">
Num Micro-Batches (PP):
<input type="number" id="num-micro-batches" value="1" min="1" max="128">
</label>
</div>
</div>
<!-- Advanced Training Options -->
<div class="form-group" style="margin-top: 15px;">
<label class="group-label">Advanced Training Options:</label>
<div class="form-group" style="margin-top: 10px;">
<label for="gradient-clipping" data-tooltip="Gradient clipping threshold (0 = disabled)">
Gradient Clipping:
<input type="number" id="gradient-clipping" value="1.0" min="0" step="0.1">
</label>
</div>
<div class="form-group" style="margin-top: 5px;">
<label for="weight-decay" data-tooltip="Weight decay for regularization">Weight Decay:</label>
<input type="number" id="weight-decay" value="0.01" min="0" step="0.001">
</div>
<div class="form-group" style="margin-top: 5px;">
<label for="lr" data-tooltip="Learning rate (for reference)">Learning Rate:</label>
<input type="number" id="lr" value="0.0001" min="0" step="0.00001">
</div>
<div class="form-group" style="margin-top: 5px;">
<label for="warmup-steps" data-tooltip="Learning rate warmup steps">Warmup Steps:</label>
<input type="number" id="warmup-steps" value="2000" min="0">
</div>
</div>
</div>
</section>
<!-- Hardware Settings -->
<section class="config-section">
<h3>Hardware</h3>
<div class="form-grid">
<div class="form-group" data-tooltip="Number of GPUs">
<label for="num-gpus">Number of GPUs:</label>
<input type="number" id="num-gpus" value="8" min="1" max="1024">
</div>
<div class="form-group" data-tooltip="GPU model and memory per GPU">
<label for="gpu-model">GPU Model:</label>
<select id="gpu-model">
<option value="16">RTX 4090 - 24GB</option>
<option value="32">V100 - 32GB</option>
<option value="40">A100 - 40GB</option>
<option value="80" selected>A100 - 80GB / H100 - 80GB</option>
<option value="141">H200 - 141GB</option>
<option value="192">B200 - 192GB</option>
<option value="custom">Custom</option>
</select>
<input type="number" id="gpu-mem-custom" value="80" min="1" style="display:none">
</div>
</div>
</section>
<!-- Calculate Buttons -->
<div class="button-group">
<button id="calculate-btn" class="btn-primary">Calculate</button>
<button id="reset-btn" class="btn-secondary">Reset</button>
</div>
</div>
<!-- Results Panel -->
<div class="results-panel">
<h2>Results</h2>
<div class="result-card">
<h3>Memory Breakdown</h3>
<div class="metric">
<span class="metric-label">Per GPU:</span>
<span class="metric-value" id="result-per-gpu">-- GB</span>
</div>
<div class="metric">
<span class="metric-label">Total All GPUs:</span>
<span class="metric-value" id="result-total">-- GB</span>
</div>
<div class="metric">
<span class="metric-label">CPU Memory:</span>
<span class="metric-value" id="result-cpu">-- GB</span>
</div>
</div>
<div class="result-card">
<h3>Component Breakdown</h3>
<div class="breakdown-item">
<span class="breakdown-label">Model Parameters:</span>
<span class="breakdown-value" id="breakdown-params">-- GB</span>
</div>
<div class="breakdown-item">
<span class="breakdown-label">Gradients:</span>
<span class="breakdown-value" id="breakdown-grads">-- GB</span>
</div>
<div class="breakdown-item">
<span class="breakdown-label">Optimizer States:</span>
<span class="breakdown-value" id="breakdown-optimizer">-- GB</span>
</div>
<div class="breakdown-item">
<span class="breakdown-label">Activations:</span>
<span class="breakdown-value" id="breakdown-activations">-- GB</span>
</div>
<div class="breakdown-item">
<span class="breakdown-label">Overhead:</span>
<span class="breakdown-value" id="breakdown-overhead">-- GB</span>
</div>
<!-- Simple bar chart -->
<div class="bar-chart" id="breakdown-chart">
<div class="bar" id="bar-params" style="width: 0%" title="Model Parameters"></div>
<div class="bar" id="bar-grads" style="width: 0%" title="Gradients"></div>
<div class="bar" id="bar-optimizer" style="width: 0%" title="Optimizer States"></div>
<div class="bar" id="bar-activations" style="width: 0%" title="Activations"></div>
</div>
<div class="chart-legend">
<span class="legend-item"><span class="legend-color params"></span>Params</span>
<span class="legend-item"><span class="legend-color grads"></span>Grads</span>
<span class="legend-item"><span class="legend-color optimizer"></span>Opt</span>
<span class="legend-item"><span class="legend-color activations"></span>Act</span>
</div>
</div>
<div class="result-card">
<h3>Feasibility</h3>
<div class="metric">
<span class="metric-label">Status:</span>
<span class="metric-value" id="feasibility-status">--</span>
</div>
<div class="metric">
<span class="metric-label">Utilization:</span>
<span class="metric-value" id="feasibility-util">--%</span>
</div>
<div class="metric" id="recommended-batch-container" style="display:none">
<span class="metric-label">Recommended Batch:</span>
<span class="metric-value" id="recommended-batch">--</span>
</div>
</div>
<div class="result-card">
<h3>Formula Explanation</h3>
<div id="formula-description" class="formula-description">
<p>Run a calculation to see the formula breakdown.</p>
</div>
<div id="formula-components" style="display:none;">
<!-- Formula components will be inserted here -->
</div>
<div class="formula-references" style="display:none;">
<h4>References:</h4>
<ul id="references-list"></ul>
</div>
<button id="show-formula-btn" class="btn-secondary" style="margin-top: 10px; width: 100%;">
Show Formula Details
</button>
</div>
<div class="button-group">
<button id="save-config-btn" class="btn-secondary">Save Config</button>
<button id="copy-json-btn" class="btn-secondary">Copy JSON</button>
<button id="export-framework-btn" class="btn-secondary">โฌ‡๏ธ Export Framework Config</button>
</div>
</div>
</div><!-- End Training Tab -->
<!-- Inference Tab -->
<div id="inference-tab" class="tab-content" style="display:none;">
<div class="config-panel">
<h2>Inference Configuration</h2>
<!-- Model Settings -->
<section class="config-section">
<h3>Model Settings</h3>
<div class="form-group">
<label for="inference-preset-select">Preset Model:</label>
<select id="inference-preset-select">
<option value="custom">Custom</option>
<optgroup label="Dense Models">
<option value="llama2-7b">LLaMA 2 7B</option>
<option value="llama2-13b">LLaMA 2 13B</option>
<option value="llama2-70b">LLaMA 2 70B</option>
<option value="gpt3-175b">GPT-3 175B</option>
</optgroup>
<optgroup label="MoE (Mixture of Experts) Models">
<option value="glm-4.7-355b">GLM-4.7 355B (MoE) โญ Latest</option>
<option value="glm-4.5-air-106b">GLM-4.5 Air 106B (MoE) โญ Air</option>
<option value="glm-4-9b">GLM-4 9B (MoE)</option>
<option value="mixtral-8x7b">Mixtral 8x7B (MoE)</option>
<option value="qwen1.5-moe-a2.7b">Qwen1.5-MoE-A2.7B</option>
<option value="deepseek-moe-16b">DeepSeek-MoE 16B</option>
</optgroup>
</select>
</div>
<div class="form-grid">
<div class="form-group">
<label for="inference-model-name">Model Name:</label>
<input type="text" id="inference-model-name" value="custom-model">
</div>
<div class="form-group">
<label for="inference-num-params">Parameters:</label>
<input type="text" id="inference-num-params" value="7B" placeholder="e.g., 7B">
</div>
<div class="form-group">
<label for="inference-num-layers">Layers:</label>
<input type="number" id="inference-num-layers" value="32" min="1">
</div>
<div class="form-group">
<label for="inference-hidden-size">Hidden Size:</label>
<input type="number" id="inference-hidden-size" value="4096" min="1">
</div>
<div class="form-group">
<label for="inference-num-heads">Attention Heads:</label>
<input type="number" id="inference-num-heads" value="32" min="1">
</div>
<div class="form-group">
<label for="inference-vocab-size">Vocab Size:</label>
<input type="number" id="inference-vocab-size" value="32000" min="1">
</div>
<div class="form-group">
<label for="inference-seq-len">Max Seq Length:</label>
<input type="number" id="inference-seq-len" value="4096" min="1">
</div>
</div>
</section>
<!-- Inference Settings -->
<section class="config-section">
<h3>Inference Settings</h3>
<div class="form-grid">
<div class="form-group">
<label for="inference-engine" data-tooltip="Inference engine to use">Inference Engine:</label>
<select id="inference-engine">
<option value="huggingface">HuggingFace Transformers</option>
<option value="vllm" selected>vLLM (Recommended)</option>
<option value="tgi">TGI (HuggingFace TGI)</option>
<option value="tensorrt_llm">TensorRT-LLM</option>
<option value="sglang">SGLang</option>
</select>
</div>
<div class="form-group">
<label for="inference-batch-size">Batch Size:</label>
<input type="number" id="inference-batch-size" value="32" min="1">
</div>
<div class="form-group">
<label for="kv-cache-quantization" data-tooltip="KV cache quantization type">KV Cache Quantization:</label>
<select id="kv-cache-quantization">
<option value="none" selected>NONE (FP16)</option>
<option value="int8">INT8 (2x compression)</option>
<option value="fp8">FP8 (4x compression)</option>
<option value="int4">INT4 (8x compression)</option>
</select>
</div>
<div class="form-group">
<label for="tensor-parallel-size" data-tooltip="Number of GPUs for tensor parallelism">Tensor Parallel Size:</label>
<input type="number" id="tensor-parallel-size" value="1" min="1" max="8">
</div>
<div class="form-group">
<label for="gpu-memory-util" data-tooltip="GPU memory utilization (0.0-1.0)">GPU Memory Utilization:</label>
<input type="range" id="gpu-memory-util" min="0.5" max="0.95" step="0.05" value="0.9">
<span id="gpu-memory-util-value">0.90</span>
</div>
<div class="form-group">
<label for="inference-gpu-model">GPU Model:</label>
<select id="inference-gpu-model">
<option value="24">RTX 4090 - 24GB</option>
<option value="32">V100 - 32GB</option>
<option value="40">A100 - 40GB</option>
<option value="80" selected>A100 - 80GB / H100 - 80GB</option>
<option value="141">H200 - 141GB</option>
</select>
</div>
<div class="form-group">
<label for="inference-num-gpus">Number of GPUs:</label>
<input type="number" id="inference-num-gpus" value="1" min="1">
</div>
<div class="form-group">
<label for="use-kv-cache">Enable KV Cache:</label>
<input type="checkbox" id="use-kv-cache" checked>
</div>
</div>
</section>
<!-- TGI-specific Settings -->
<section class="config-section" id="tgi-settings" style="display:none;">
<h3>TGI-Specific Settings</h3>
<div class="form-grid">
<div class="form-group">
<label for="max-total-tokens" data-tooltip="Most important: defines memory budget (input + output)">Max Total Tokens:</label>
<input type="number" id="max-total-tokens" value="4096" min="1" placeholder="e.g., 4096">
</div>
<div class="form-group">
<label for="max-input-tokens">Max Input Tokens:</label>
<input type="number" id="max-input-tokens" value="2048" min="1" placeholder="e.g., 2048">
</div>
<div class="form-group">
<label for="max-batch-total-tokens">Max Batch Total Tokens:</label>
<input type="number" id="max-batch-total-tokens" value="8192" min="1" placeholder="e.g., 8192">
</div>
<div class="form-group">
<label for="tgi-quantize">Weight Quantization:</label>
<select id="tgi-quantize">
<option value="none" selected>NONE</option>
<option value="awq">AWQ</option>
<option value="eetq">EETQ</option>
<option value="exl2">EXL2</option>
<option value="gptq">GPTQ</option>
<option value="marlin">Marlin</option>
<option value="bitsandbytes">BitsAndBytes (8-bit)</option>
<option value="bitsandbytes-nf4">BitsAndBytes NF4</option>
<option value="bitsandbytes-fp4">BitsAndBytes FP4</option>
<option value="fp8">FP8</option>
</select>
</div>
<div class="form-group">
<label for="tgi-dtype">Data Type:</label>
<select id="tgi-dtype">
<option value="float16">Float16</option>
<option value="bfloat16" selected>BFloat16</option>
</select>
</div>
<div class="form-group">
<label for="sharded">Enable Sharded:</label>
<input type="checkbox" id="sharded">
</div>
<div class="form-group">
<label for="num-shard">Number of Shards:</label>
<input type="number" id="num-shard" value="1" min="1" placeholder="Auto if empty">
</div>
</div>
</section>
<!-- vLLM-specific Settings -->
<section class="config-section" id="vllm-settings" style="display:none;">
<h3>vLLM-Specific Settings</h3>
<div class="form-grid">
<div class="form-group">
<label for="block-size" data-tooltip="Block size for paged KV cache (default: 16)">Block Size:</label>
<select id="block-size">
<option value="">Auto (16)</option>
<option value="1">1</option>
<option value="8">8</option>
<option value="16" selected>16</option>
<option value="32">32</option>
<option value="64">64</option>
<option value="128">128</option>
</select>
</div>
<div class="form-group">
<label for="swap-space-gb">CPU Swap Space (GB):</label>
<input type="number" id="swap-space-gb" value="0" min="0" step="0.1">
</div>
<div class="form-group">
<label for="enable-prefix-caching">Enable Prefix Caching:</label>
<input type="checkbox" id="enable-prefix-caching">
</div>
<div class="form-group">
<label for="enforce-eager">Enable Eager Mode:</label>
<input type="checkbox" id="enforce-eager">
</div>
<div class="form-group">
<label for="max-num-batched-tokens">Max Batched Tokens:</label>
<input type="number" id="max-num-batched-tokens" placeholder="Auto if empty">
</div>
<div class="form-group">
<label for="max-num-seqs">Max Sequences per Batch:</label>
<input type="number" id="max-num-seqs" placeholder="Auto if empty">
</div>
<div class="form-group">
<label for="vllm-quantization">Weight Quantization:</label>
<select id="vllm-quantization">
<option value="none" selected>NONE</option>
<option value="awq">AWQ</option>
<option value="gptq">GPTQ</option>
<option value="squeezellm">SqueezeLLM</option>
<option value="fp8">FP8</option>
</select>
</div>
</div>
</section>
<!-- TensorRT-LLM-specific Settings -->
<section class="config-section" id="tensorrt-settings" style="display:none;">
<h3>TensorRT-LLM-Specific Settings</h3>
<div class="form-grid">
<div class="form-group">
<label for="trt-max-batch-size">Max Batch Size:</label>
<input type="number" id="trt-max-batch-size" value="2048" min="1">
</div>
<div class="form-group">
<label for="trt-max-input-len">Max Input Length:</label>
<input type="number" id="trt-max-input-len" value="1024" min="1">
</div>
<div class="form-group">
<label for="trt-max-seq-len">Max Sequence Length:</label>
<input type="number" id="trt-max-seq-len" value="2048" min="1">
</div>
<div class="form-group">
<label for="trt-max-beam-width">Max Beam Width:</label>
<input type="number" id="trt-max-beam-width" value="1" min="1">
</div>
</div>
</section>
<!-- SGLang-specific Settings -->
<section class="config-section" id="sglang-settings" style="display:none;">
<h3>SGLang-Specific Settings</h3>
<div class="form-grid">
<div class="form-group">
<label for="chunk-size" data-tooltip="Prefill chunk size for long contexts (default: 8192)">Chunk Size:</label>
<input type="number" id="chunk-size" value="8192" min="1" placeholder="e.g., 8192">
</div>
<div class="form-group">
<label for="max-running-requests" data-tooltip="Maximum number of concurrent requests">Max Running Requests:</label>
<input type="number" id="max-running-requests" value="128" min="1" placeholder="e.g., 128">
</div>
<div class="form-group">
<label for="radix-cache-max-seq-len" data-tooltip="Maximum sequence length for RadixCache">RadixCache Max Seq Len:</label>
<input type="number" id="radix-cache-max-seq-len" value="8192" min="1" placeholder="e.g., 8192">
</div>
<div class="form-group">
<label for="attention-backend" data-tooltip="Attention backend implementation">Attention Backend:</label>
<select id="attention-backend">
<option value="flashinfer" selected>FlashInfer</option>
<option value="triton">Triton</option>
<option value="torch">Torch</option>
</select>
</div>
<div class="form-group">
<label for="speculative-algo" data-tooltip="Speculative decoding algorithm">Speculative Algorithm:</label>
<select id="speculative-algo">
<option value="default" selected>Default</option>
<option value="medusa">Medusa</option>
<option value="eagle">EAGLE</option>
</select>
</div>
<div class="form-group">
<label for="disable-radix-cache">Disable RadixCache:</label>
<input type="checkbox" id="disable-radix-cache">
</div>
<div class="form-group">
<label for="enable-p2p">Enable P2P Attention:</label>
<input type="checkbox" id="enable-p2p">
</div>
<div class="form-group">
<label for="disable-custom-all-reduce">Disable Custom All-Reduce:</label>
<input type="checkbox" id="disable-custom-all-reduce">
</div>
<div class="form-group">
<label for="enable-torch-compile">Enable torch.compile:</label>
<input type="checkbox" id="enable-torch-compile">
</div>
<div class="form-group">
<label for="multi-lora-enabled">Enable Multi-LoRA:</label>
<input type="checkbox" id="multi-lora-enabled">
</div>
</div>
</section>
<!-- Calculate Button -->
<div class="button-group">
<button id="inference-calculate-btn" class="btn-primary">Calculate Inference Memory</button>
<button id="inference-reset-btn" class="btn-secondary">Reset</button>
</div>
</div>
<!-- Inference Results Panel -->
<div class="results-panel">
<h2>Inference Results</h2>
<div class="result-card">
<h3>Memory Breakdown</h3>
<div class="metric">
<span class="metric-label">Per GPU:</span>
<span class="metric-value" id="inference-result-per-gpu">-- GB</span>
</div>
<div class="metric">
<span class="metric-label">Total All GPUs:</span>
<span class="metric-value" id="inference-result-total">-- GB</span>
</div>
<div class="metric">
<span class="metric-label">Model Parameters:</span>
<span class="metric-value" id="inference-result-params">-- GB</span>
</div>
<div class="metric">
<span class="metric-label">KV Cache:</span>
<span class="metric-value" id="inference-result-kv-cache">-- GB</span>
</div>
<div class="metric">
<span class="metric-label">Activations:</span>
<span class="metric-value" id="inference-result-activations">-- GB</span>
</div>
</div>
<div class="result-card">
<h3>Performance Estimates</h3>
<div class="metric">
<span class="metric-label">Max Batch Size:</span>
<span class="metric-value" id="inference-max-batch">--</span>
</div>
<div class="metric">
<span class="metric-label">Estimated Throughput:</span>
<span class="metric-value" id="inference-throughput">-- tokens/sec</span>
</div>
<div class="metric">
<span class="metric-label">Fits on GPU:</span>
<span class="metric-value" id="inference-fits">--</span>
</div>
<div class="metric">
<span class="metric-label">Utilization:</span>
<span class="metric-value" id="inference-utilization">--%</span>
</div>
</div>
</div>
</div><!-- End Inference Tab -->
<!-- Multi-Node Tab -->
<div id="multinode-tab" class="tab-content" style="display:none;">
<div class="config-panel">
<h2>Multi-Node Training Configuration</h2>
<p class="info-text">Calculate network communication overhead for distributed training across multiple nodes.</p>
<!-- Model Settings -->
<section class="config-section">
<h3>Model Settings</h3>
<div class="form-group">
<label for="multinode-preset-select">Preset Model:</label>
<select id="multinode-preset-select">
<option value="custom">Custom</option>
<optgroup label="Dense Models">
<option value="llama2-7b">LLaMA 2 7B</option>
<option value="llama2-13b">LLaMA 2 13B</option>
<option value="llama2-70b">LLaMA 2 70B</option>
<option value="gpt3-175b">GPT-3 175B</option>
</optgroup>
<optgroup label="MoE (Mixture of Experts) Models">
<option value="glm-4.7-355b">GLM-4.7 355B (MoE) โญ Latest</option>
<option value="glm-4.5-air-106b">GLM-4.5 Air 106B (MoE) โญ Air</option>
<option value="glm-4-9b">GLM-4 9B (MoE)</option>
<option value="mixtral-8x7b">Mixtral 8x7B (MoE)</option>
<option value="qwen1.5-moe-a2.7b">Qwen1.5-MoE-A2.7B</option>
<option value="deepseek-moe-16b">DeepSeek-MoE 16B</option>
</optgroup>
</select>
</div>
<div class="form-grid">
<div class="form-group">
<label for="multinode-num-params">Parameters:</label>
<input type="text" id="multinode-num-params" value="7B" placeholder="e.g., 7B">
</div>
<div class="form-group">
<label for="multinode-dtype">Precision:</label>
<select id="multinode-dtype">
<option value="bf16" selected>BF16</option>
<option value="fp16">FP16</option>
<option value="fp32">FP32</option>
</select>
</div>
</div>
</section>
<!-- Node Configuration -->
<section class="config-section">
<h3>Node Configuration</h3>
<div class="form-grid">
<div class="form-group">
<label for="num-nodes">Number of Nodes:</label>
<input type="number" id="num-nodes" value="2" min="1">
</div>
<div class="form-group">
<label for="gpus-per-node">GPUs per Node:</label>
<input type="number" id="gpus-per-node" value="8" min="1">
</div>
<div class="form-group">
<label for="interconnect-type" data-tooltip="Network interconnect between nodes">Interconnect Type:</label>
<select id="interconnect-type">
<option value="infiniband" selected>InfiniBand (200 Gbps)</option>
<option value="nvlink">NVLink (900 Gbps)</option>
<option value="ethernet_200g">Ethernet 200G</option>
<option value="ethernet_100g">Ethernet 100G</option>
<option value="ethernet_25g">Ethernet 25G</option>
<option value="ethernet_10g">Ethernet 10G</option>
</select>
</div>
</div>
<p class="info-text">Total GPUs: <span id="multinode-total-gpus">16</span></p>
</section>
<!-- Training Configuration -->
<section class="config-section">
<h3>Training Configuration</h3>
<div class="form-grid">
<div class="form-group">
<label for="multinode-engine">Training Engine:</label>
<select id="multinode-engine">
<option value="pytorch_ddp">PyTorch DDP</option>
<option value="deepspeed" selected>DeepSpeed ZeRO</option>
<option value="megatron_lm">Megatron-LM</option>
<option value="fsdp">PyTorch FSDP</option>
</select>
</div>
<div class="form-group" id="multinode-zero-stage-group">
<label for="multinode-zero-stage">ZeRO Stage:</label>
<select id="multinode-zero-stage">
<option value="1">1: Shard optimizer states</option>
<option value="2">2: Shard optimizer + gradients</option>
<option value="3" selected>3: Shard everything</option>
</select>
</div>
<div class="form-group">
<label for="multinode-batch-size">Batch Size per GPU:</label>
<input type="number" id="multinode-batch-size" value="4" min="1">
</div>
<div class="form-group">
<label for="multinode-seq-len">Sequence Length:</label>
<input type="number" id="multinode-seq-len" value="4096" min="1">
</div>
</div>
</section>
<!-- Parallelism Settings -->
<section class="config-section">
<h3>Parallelism Strategy</h3>
<div class="form-grid">
<div class="form-group">
<label for="multinode-tensor-pp">Tensor Parallel:</label>
<input type="number" id="multinode-tensor-pp" value="1" min="1">
</div>
<div class="form-group">
<label for="multinode-pipeline-pp">Pipeline Parallel:</label>
<input type="number" id="multinode-pipeline-pp" value="1" min="1">
</div>
<div class="form-group">
<label for="multinode-seq-parallel">
<input type="checkbox" id="multinode-seq-parallel">
Enable Sequence Parallel
</label>
</div>
<div class="form-group">
<label for="multinode-optimize">
<input type="checkbox" id="multinode-optimize" checked>
Auto-optimize Strategy
</label>
</div>
</div>
</section>
<!-- Calculate Button -->
<div class="button-group">
<button id="multinode-calculate-btn" class="btn-primary">Calculate Network Overhead</button>
<button id="multinode-reset-btn" class="btn-secondary">Reset</button>
</div>
</div>
<!-- Multi-Node Results Panel -->
<div class="results-panel">
<h2>Multi-Node Results</h2>
<div class="result-card">
<h3>Network Overhead</h3>
<div class="metric">
<span class="metric-label">Total Overhead:</span>
<span class="metric-value" id="multinode-overhead-total">-- GB</span>
</div>
<div class="metric">
<span class="metric-label">AllReduce:</span>
<span class="metric-value" id="multinode-overhead-allreduce">-- GB</span>
</div>
<div class="metric">
<span class="metric-label">AllGather:</span>
<span class="metric-value" id="multinode-overhead-allgather">-- GB</span>
</div>
<div class="metric">
<span class="metric-label">ReduceScatter:</span>
<span class="metric-value" id="multinode-overhead-reducescatter">-- GB</span>
</div>
<div class="metric">
<span class="metric-label">Pipeline Comm:</span>
<span class="metric-value" id="multinode-overhead-pipeline">-- GB</span>
</div>
</div>
<div class="result-card">
<h3>Time Impact</h3>
<div class="metric">
<span class="metric-label">Est. Overhead:</span>
<span class="metric-value" id="multinode-time-overhead">-- ms/step</span>
</div>
<div class="metric">
<span class="metric-label">Communication Time:</span>
<span class="metric-value" id="multinode-comm-time">-- ms/step</span>
</div>
<div class="metric">
<span class="metric-label">Latency Impact:</span>
<span class="metric-value" id="multinode-latency">-- ms</span>
</div>
</div>
<div class="result-card">
<h3>Optimization Suggestions</h3>
<div id="multinode-suggestions">
<p>Run calculation to see optimization suggestions.</p>
</div>
</div>
</div>
</div><!-- End Multi-Node Tab -->
</div>
<div id="error-message" class="error-message" style="display:none"></div>
</div>
<script src="/static/js/app.js"></script>
</body>
</html>