Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>GPU Memory Calculator for LLM Training</title> | |
| <link rel="stylesheet" href="/static/css/styles.css"> | |
| </head> | |
| <body> | |
| <div class="container"> | |
| <header> | |
| <h1>๐ GPU Memory Calculator</h1> | |
| <p class="subtitle">For LLM Training, Inference, and Multi-Node Optimization</p> | |
| </header> | |
| <!-- Tab Navigation --> | |
| <nav class="tab-navigation"> | |
| <button class="tab-btn active" data-tab="training">๐ Training</button> | |
| <button class="tab-btn" data-tab="inference">๐ Inference</button> | |
| <button class="tab-btn" data-tab="multinode">๐ Multi-Node</button> | |
| </nav> | |
| <div class="main-content"> | |
| <!-- Training Tab --> | |
| <div id="training-tab" class="tab-content active"> | |
| <!-- Configuration Panel --> | |
| <div class="config-panel"> | |
| <h2>Training Configuration</h2> | |
| <!-- Model Settings --> | |
| <section class="config-section"> | |
| <h3>Model Settings</h3> | |
| <div class="form-group"> | |
| <label for="preset-select">Preset Model:</label> | |
| <div class="preset-row"> | |
| <select id="preset-select"> | |
| <option value="custom">Custom</option> | |
| <optgroup label="Dense Models"> | |
| <option value="llama2-7b">LLaMA 2 7B</option> | |
| <option value="llama2-13b">LLaMA 2 13B</option> | |
| <option value="llama2-70b">LLaMA 2 70B</option> | |
| <option value="gpt3-175b">GPT-3 175B</option> | |
| </optgroup> | |
| <optgroup label="MoE (Mixture of Experts) Models"> | |
| <option value="glm-4.7-355b">GLM-4.7 355B (MoE) โญ Latest</option> | |
| <option value="glm-4.5-air-106b">GLM-4.5 Air 106B (MoE) โญ Air</option> | |
| <option value="glm-4-9b">GLM-4 9B (MoE)</option> | |
| <option value="mixtral-8x7b">Mixtral 8x7B (MoE)</option> | |
| <option value="qwen1.5-moe-a2.7b">Qwen1.5-MoE-A2.7B</option> | |
| <option value="deepseek-moe-16b">DeepSeek-MoE 16B</option> | |
| </optgroup> | |
| </select> | |
| <button id="fetch-hf-btn" class="btn-tertiary" title="Fetch from HuggingFace Hub" type="button"> | |
| <span>๐ค Fetch from HF</span> | |
| </button> | |
| </div> | |
| </div> | |
| <!-- HF Fetch Panel (hidden by default) --> | |
| <div id="hf-fetch-panel" style="display: none;" class="hf-fetch-panel"> | |
| <div class="form-group"> | |
| <label for="hf-model-id">HuggingFace Model ID:</label> | |
| <input type="text" id="hf-model-id" placeholder="e.g., meta-llama/Llama-2-7b-hf" aria-describedby="hf-model-help"> | |
| <span id="hf-model-help" class="help-text">Enter the HuggingFace model repository ID (e.g., meta-llama/Llama-2-7b-hf)</span> | |
| </div> | |
| <div class="form-group"> | |
| <label for="hf-token">HF Token (optional, for private models):</label> | |
| <input type="password" id="hf-token" placeholder="hf_xxxxxxxxxxxx" aria-describedby="hf-token-help"> | |
| <span id="hf-token-help" class="help-text">Leave empty for public models, provide token for gated/private models</span> | |
| </div> | |
| <div class="button-group"> | |
| <button id="hf-fetch-submit" class="btn-primary" type="button">Fetch Model</button> | |
| <button id="hf-fetch-cancel" class="btn-secondary" type="button">Cancel</button> | |
| </div> | |
| <div id="hf-loading" style="display: none;" class="loading-message"> | |
| <p>Fetching model from HuggingFace Hub...</p> | |
| </div> | |
| <div id="hf-error" style="display: none;" class="error-message" aria-live="poloice"></div> | |
| <div id="hf-success" style="display: none;" class="success-message" aria-live="police"></div> | |
| </div> | |
| <div class="form-grid"> | |
| <div class="form-group" data-tooltip="Name of your model"> | |
| <label for="model-name" id="label-model-name">Model Name:</label> | |
| <input type="text" id="model-name" value="custom-model" aria-labelledby="label-model-name"> | |
| </div> | |
| <div class="form-group" data-tooltip="Total number of parameters (e.g., 7B, 7000M, 7000000000)"> | |
| <label for="num-params" id="label-num-params">Parameters:</label> | |
| <input type="text" id="num-params" value="7B" placeholder="e.g., 7B" aria-labelledby="label-num-params" aria-describedby="help-num-params"> | |
| <span id="help-num-params" class="sr-only">Enter model size as number with optional suffix: 7B, 7000M, or 7000000000</span> | |
| </div> | |
| <div class="form-group" data-tooltip="Number of transformer layers"> | |
| <label for="num-layers" id="label-num-layers">Layers:</label> | |
| <input type="number" id="num-layers" value="32" min="1" aria-labelledby="label-num-layers"> | |
| </div> | |
| <div class="form-group" data-tooltip="Hidden dimension size"> | |
| <label for="hidden-size" id="label-hidden-size">Hidden Size:</label> | |
| <input type="number" id="hidden-size" value="4096" min="1" aria-labelledby="label-hidden-size"> | |
| </div> | |
| <div class="form-group" data-tooltip="Number of attention heads"> | |
| <label for="num-heads" id="label-num-heads">Attention Heads:</label> | |
| <input type="number" id="num-heads" value="32" min="1" aria-labelledby="label-num-heads"> | |
| </div> | |
| <div class="form-group" data-tooltip="Vocabulary size"> | |
| <label for="vocab-size" id="label-vocab-size">Vocab Size:</label> | |
| <input type="number" id="vocab-size" value="32000" min="1" aria-labelledby="label-vocab-size"> | |
| </div> | |
| <div class="form-group" data-tooltip="Maximum sequence length"> | |
| <label for="seq-len" id="label-seq-len">Max Seq Length:</label> | |
| <input type="number" id="seq-len" value="4096" min="1" aria-labelledby="label-seq-len"> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- MoE (Mixture of Experts) Settings --> | |
| <section class="config-section"> | |
| <h3>Mixture of Experts (MoE)</h3> | |
| <div class="form-group" data-tooltip="Enable Mixture of Experts architecture"> | |
| <label for="moe-enabled"> | |
| <input type="checkbox" id="moe-enabled"> | |
| Enable MoE | |
| </label> | |
| </div> | |
| <div id="moe-fields" style="display:none;"> | |
| <div class="form-grid"> | |
| <div class="form-group" data-tooltip="Total number of experts in the model"> | |
| <label for="num-experts">Number of Experts:</label> | |
| <input type="number" id="num-experts" value="8" min="1" max="256"> | |
| </div> | |
| <div class="form-group" data-tooltip="Number of experts activated per token (top-k routing)"> | |
| <label for="top-k">Top-K (active experts):</label> | |
| <input type="number" id="top-k" value="2" min="1" max="8"> | |
| </div> | |
| <div class="form-group" data-tooltip="Expert intermediate layer size (default: 4x hidden_size)"> | |
| <label for="expert-intermediate-size">Expert Intermediate Size:</label> | |
| <input type="number" id="expert-intermediate-size" value="" placeholder="Auto (4x hidden)" min="1"> | |
| </div> | |
| <div class="form-group" data-tooltip="Shared expert intermediate size (for models like GLM)"> | |
| <label for="shared-expert-size">Shared Expert Size:</label> | |
| <input type="number" id="shared-expert-size" value="" placeholder="None" min="1"> | |
| </div> | |
| </div> | |
| <p class="info-text">With MoE, only <strong><span id="active-experts-display">2</span></strong> of <strong><span id="total-experts-display">8</span></strong> experts are active per token, reducing activation memory.</p> | |
| </div> | |
| </section> | |
| <!-- Training Settings --> | |
| <section class="config-section"> | |
| <h3>Training Settings</h3> | |
| <div class="form-grid"> | |
| <div class="form-group"> | |
| <label for="batch-size" data-tooltip="Batch size per GPU">Batch Size:</label> | |
| <input type="number" id="batch-size" value="4" min="1"> | |
| <input type="range" id="batch-size-slider" min="1" max="128" value="4"> | |
| </div> | |
| <div class="form-group" data-tooltip="Gradient accumulation steps"> | |
| <label for="grad-accum">Gradient Accumulation:</label> | |
| <input type="number" id="grad-accum" value="4" min="1"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="optimizer" data-tooltip="Optimizer type">Optimizer:</label> | |
| <select id="optimizer"> | |
| <option value="adamw">AdamW</option> | |
| <option value="adam">Adam</option> | |
| <option value="sgd">SGD</option> | |
| <option value="adamw_8bit">AdamW 8-bit</option> | |
| </select> | |
| </div> | |
| <div class="form-group"> | |
| <label for="dtype" data-tooltip="Data type for training">Precision:</label> | |
| <select id="dtype"> | |
| <option value="bf16" selected>BF16</option> | |
| <option value="fp16">FP16</option> | |
| <option value="fp32">FP32</option> | |
| <option value="int8">INT8</option> | |
| <option value="int4">INT4</option> | |
| </select> | |
| </div> | |
| <div class="form-group"> | |
| <label for="activation-checkpointing" data-tooltip="Activation checkpointing level (0=none, 4=full)"> | |
| Activation Checkpointing: | |
| </label> | |
| <select id="activation-checkpointing"> | |
| <option value="0">0: None (most memory)</option> | |
| <option value="1">1: Checkpoint attention output</option> | |
| <option value="2" selected>2: Checkpoint attention input</option> | |
| <option value="3">3: Checkpoint layer + attention</option> | |
| <option value="4">4: Full checkpointing (least memory)</option> | |
| </select> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- Parallelism Settings --> | |
| <section class="config-section"> | |
| <h3>Parallelism</h3> | |
| <div class="form-grid"> | |
| <div class="form-group" data-tooltip="Tensor parallelism degree"> | |
| <label for="tensor-pp">Tensor PP:</label> | |
| <input type="number" id="tensor-pp" value="1" min="1" max="8"> | |
| </div> | |
| <div class="form-group" data-tooltip="Pipeline parallelism degree"> | |
| <label for="pipeline-pp">Pipeline PP:</label> | |
| <input type="number" id="pipeline-pp" value="1" min="1" max="16"> | |
| </div> | |
| <div class="form-group" data-tooltip="Data parallelism degree"> | |
| <label for="data-pp">Data PP:</label> | |
| <input type="number" id="data-pp" value="8" min="1"> | |
| </div> | |
| <div class="form-group" data-tooltip="Enable sequence parallelism"> | |
| <label for="seq-parallel"> | |
| <input type="checkbox" id="seq-parallel"> | |
| Sequence Parallel | |
| </label> | |
| </div> | |
| </div> | |
| <p class="info-text">Effective GPUs: <span id="effective-gpus">8</span></p> | |
| </section> | |
| <!-- Engine Settings --> | |
| <section class="config-section"> | |
| <h3>Training Engine</h3> | |
| <div class="form-group"> | |
| <label for="engine-type" data-tooltip="Training framework/engine">Engine Type:</label> | |
| <select id="engine-type"> | |
| <option value="pytorch_ddp">PyTorch DDP</option> | |
| <option value="deepspeed" selected>DeepSpeed ZeRO</option> | |
| <option value="megatron_lm">Megatron-LM</option> | |
| <option value="fsdp">PyTorch FSDP</option> | |
| <option value="megatron_deepspeed">Megatron + DeepSpeed</option> | |
| </select> | |
| </div> | |
| <div id="engine-options"> | |
| <!-- Dynamic fields based on engine type --> | |
| <!-- DeepSpeed ZeRO options --> | |
| <div class="form-group" id="zero-stage-group"> | |
| <label for="zero-stage" data-tooltip="DeepSpeed ZeRO stage (0-3)">ZeRO Stage:</label> | |
| <select id="zero-stage"> | |
| <option value="0">0: Disabled</option> | |
| <option value="1">1: Shard optimizer states</option> | |
| <option value="2">2: Shard optimizer + gradients</option> | |
| <option value="3" selected>3: Shard everything</option> | |
| </select> | |
| </div> | |
| <div class="form-group" id="offload-opt-group"> | |
| <label for="offload-optimizer" data-tooltip="CPU offload for optimizer states">Offload Optimizer:</label> | |
| <select id="offload-optimizer"> | |
| <option value="none">None</option> | |
| <option value="cpu" selected>CPU</option> | |
| <option value="nvme">NVMe</option> | |
| </select> | |
| </div> | |
| <div class="form-group" id="offload-param-group"> | |
| <label for="offload-param" data-tooltip="CPU offload for parameters">Offload Parameters:</label> | |
| <select id="offload-param"> | |
| <option value="none" selected>None</option> | |
| <option value="cpu">CPU</option> | |
| <option value="nvme">NVMe</option> | |
| </select> | |
| </div> | |
| <!-- ZeRO-Init option --> | |
| <div class="form-group" id="zero-init-group"> | |
| <label for="zero-init" data-tooltip="Use ZeRO initialization (reduces memory during init)"> | |
| <input type="checkbox" id="zero-init" checked> | |
| ZeRO Init (ZeRO-3) | |
| </label> | |
| </div> | |
| <!-- FSDP Sharding Strategy --> | |
| <div class="form-group" id="sharding-strategy-group" style="display:none;"> | |
| <label for="sharding-strategy" data-tooltip="FSDP sharding strategy">Sharding Strategy:</label> | |
| <select id="sharding-strategy"> | |
| <option value="no_shard">No Sharding (like DDP)</option> | |
| <option value="shard_grad_op">Shard Gradients + Optimizer (ZeRO-2)</option> | |
| <option value="full_shard" selected>Full Shard (ZeRO-3)</option> | |
| </select> | |
| </div> | |
| <!-- Megatron-specific options --> | |
| <div class="form-group" id="megatron-options" style="display:none;"> | |
| <label class="group-label">Megatron-LM Options:</label> | |
| <div class="form-group" style="margin-top: 10px;"> | |
| <label for="model-parallelism" data-tooltip="Model parallelism strategy"> | |
| <input type="checkbox" id="use-distributed-optimizer"> | |
| Use Distributed Optimizer | |
| </label> | |
| </div> | |
| <div class="form-group" style="margin-top: 5px;"> | |
| <label for="num-micro-batches" data-tooltip="Number of micro-batches for pipeline parallelism"> | |
| Num Micro-Batches (PP): | |
| <input type="number" id="num-micro-batches" value="1" min="1" max="128"> | |
| </label> | |
| </div> | |
| </div> | |
| <!-- Advanced Training Options --> | |
| <div class="form-group" style="margin-top: 15px;"> | |
| <label class="group-label">Advanced Training Options:</label> | |
| <div class="form-group" style="margin-top: 10px;"> | |
| <label for="gradient-clipping" data-tooltip="Gradient clipping threshold (0 = disabled)"> | |
| Gradient Clipping: | |
| <input type="number" id="gradient-clipping" value="1.0" min="0" step="0.1"> | |
| </label> | |
| </div> | |
| <div class="form-group" style="margin-top: 5px;"> | |
| <label for="weight-decay" data-tooltip="Weight decay for regularization">Weight Decay:</label> | |
| <input type="number" id="weight-decay" value="0.01" min="0" step="0.001"> | |
| </div> | |
| <div class="form-group" style="margin-top: 5px;"> | |
| <label for="lr" data-tooltip="Learning rate (for reference)">Learning Rate:</label> | |
| <input type="number" id="lr" value="0.0001" min="0" step="0.00001"> | |
| </div> | |
| <div class="form-group" style="margin-top: 5px;"> | |
| <label for="warmup-steps" data-tooltip="Learning rate warmup steps">Warmup Steps:</label> | |
| <input type="number" id="warmup-steps" value="2000" min="0"> | |
| </div> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- Hardware Settings --> | |
| <section class="config-section"> | |
| <h3>Hardware</h3> | |
| <div class="form-grid"> | |
| <div class="form-group" data-tooltip="Number of GPUs"> | |
| <label for="num-gpus">Number of GPUs:</label> | |
| <input type="number" id="num-gpus" value="8" min="1" max="1024"> | |
| </div> | |
| <div class="form-group" data-tooltip="GPU model and memory per GPU"> | |
| <label for="gpu-model">GPU Model:</label> | |
| <select id="gpu-model"> | |
| <option value="16">RTX 4090 - 24GB</option> | |
| <option value="32">V100 - 32GB</option> | |
| <option value="40">A100 - 40GB</option> | |
| <option value="80" selected>A100 - 80GB / H100 - 80GB</option> | |
| <option value="141">H200 - 141GB</option> | |
| <option value="192">B200 - 192GB</option> | |
| <option value="custom">Custom</option> | |
| </select> | |
| <input type="number" id="gpu-mem-custom" value="80" min="1" style="display:none"> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- Calculate Buttons --> | |
| <div class="button-group"> | |
| <button id="calculate-btn" class="btn-primary">Calculate</button> | |
| <button id="reset-btn" class="btn-secondary">Reset</button> | |
| </div> | |
| </div> | |
| <!-- Results Panel --> | |
| <div class="results-panel"> | |
| <h2>Results</h2> | |
| <div class="result-card"> | |
| <h3>Memory Breakdown</h3> | |
| <div class="metric"> | |
| <span class="metric-label">Per GPU:</span> | |
| <span class="metric-value" id="result-per-gpu">-- GB</span> | |
| </div> | |
| <div class="metric"> | |
| <span class="metric-label">Total All GPUs:</span> | |
| <span class="metric-value" id="result-total">-- GB</span> | |
| </div> | |
| <div class="metric"> | |
| <span class="metric-label">CPU Memory:</span> | |
| <span class="metric-value" id="result-cpu">-- GB</span> | |
| </div> | |
| </div> | |
| <div class="result-card"> | |
| <h3>Component Breakdown</h3> | |
| <div class="breakdown-item"> | |
| <span class="breakdown-label">Model Parameters:</span> | |
| <span class="breakdown-value" id="breakdown-params">-- GB</span> | |
| </div> | |
| <div class="breakdown-item"> | |
| <span class="breakdown-label">Gradients:</span> | |
| <span class="breakdown-value" id="breakdown-grads">-- GB</span> | |
| </div> | |
| <div class="breakdown-item"> | |
| <span class="breakdown-label">Optimizer States:</span> | |
| <span class="breakdown-value" id="breakdown-optimizer">-- GB</span> | |
| </div> | |
| <div class="breakdown-item"> | |
| <span class="breakdown-label">Activations:</span> | |
| <span class="breakdown-value" id="breakdown-activations">-- GB</span> | |
| </div> | |
| <div class="breakdown-item"> | |
| <span class="breakdown-label">Overhead:</span> | |
| <span class="breakdown-value" id="breakdown-overhead">-- GB</span> | |
| </div> | |
| <!-- Simple bar chart --> | |
| <div class="bar-chart" id="breakdown-chart"> | |
| <div class="bar" id="bar-params" style="width: 0%" title="Model Parameters"></div> | |
| <div class="bar" id="bar-grads" style="width: 0%" title="Gradients"></div> | |
| <div class="bar" id="bar-optimizer" style="width: 0%" title="Optimizer States"></div> | |
| <div class="bar" id="bar-activations" style="width: 0%" title="Activations"></div> | |
| </div> | |
| <div class="chart-legend"> | |
| <span class="legend-item"><span class="legend-color params"></span>Params</span> | |
| <span class="legend-item"><span class="legend-color grads"></span>Grads</span> | |
| <span class="legend-item"><span class="legend-color optimizer"></span>Opt</span> | |
| <span class="legend-item"><span class="legend-color activations"></span>Act</span> | |
| </div> | |
| </div> | |
| <div class="result-card"> | |
| <h3>Feasibility</h3> | |
| <div class="metric"> | |
| <span class="metric-label">Status:</span> | |
| <span class="metric-value" id="feasibility-status">--</span> | |
| </div> | |
| <div class="metric"> | |
| <span class="metric-label">Utilization:</span> | |
| <span class="metric-value" id="feasibility-util">--%</span> | |
| </div> | |
| <div class="metric" id="recommended-batch-container" style="display:none"> | |
| <span class="metric-label">Recommended Batch:</span> | |
| <span class="metric-value" id="recommended-batch">--</span> | |
| </div> | |
| </div> | |
| <div class="result-card"> | |
| <h3>Formula Explanation</h3> | |
| <div id="formula-description" class="formula-description"> | |
| <p>Run a calculation to see the formula breakdown.</p> | |
| </div> | |
| <div id="formula-components" style="display:none;"> | |
| <!-- Formula components will be inserted here --> | |
| </div> | |
| <div class="formula-references" style="display:none;"> | |
| <h4>References:</h4> | |
| <ul id="references-list"></ul> | |
| </div> | |
| <button id="show-formula-btn" class="btn-secondary" style="margin-top: 10px; width: 100%;"> | |
| Show Formula Details | |
| </button> | |
| </div> | |
| <div class="button-group"> | |
| <button id="save-config-btn" class="btn-secondary">Save Config</button> | |
| <button id="copy-json-btn" class="btn-secondary">Copy JSON</button> | |
| <button id="export-framework-btn" class="btn-secondary">โฌ๏ธ Export Framework Config</button> | |
| </div> | |
| </div> | |
| </div><!-- End Training Tab --> | |
| <!-- Inference Tab --> | |
| <div id="inference-tab" class="tab-content" style="display:none;"> | |
| <div class="config-panel"> | |
| <h2>Inference Configuration</h2> | |
| <!-- Model Settings --> | |
| <section class="config-section"> | |
| <h3>Model Settings</h3> | |
| <div class="form-group"> | |
| <label for="inference-preset-select">Preset Model:</label> | |
| <select id="inference-preset-select"> | |
| <option value="custom">Custom</option> | |
| <optgroup label="Dense Models"> | |
| <option value="llama2-7b">LLaMA 2 7B</option> | |
| <option value="llama2-13b">LLaMA 2 13B</option> | |
| <option value="llama2-70b">LLaMA 2 70B</option> | |
| <option value="gpt3-175b">GPT-3 175B</option> | |
| </optgroup> | |
| <optgroup label="MoE (Mixture of Experts) Models"> | |
| <option value="glm-4.7-355b">GLM-4.7 355B (MoE) โญ Latest</option> | |
| <option value="glm-4.5-air-106b">GLM-4.5 Air 106B (MoE) โญ Air</option> | |
| <option value="glm-4-9b">GLM-4 9B (MoE)</option> | |
| <option value="mixtral-8x7b">Mixtral 8x7B (MoE)</option> | |
| <option value="qwen1.5-moe-a2.7b">Qwen1.5-MoE-A2.7B</option> | |
| <option value="deepseek-moe-16b">DeepSeek-MoE 16B</option> | |
| </optgroup> | |
| </select> | |
| </div> | |
| <div class="form-grid"> | |
| <div class="form-group"> | |
| <label for="inference-model-name">Model Name:</label> | |
| <input type="text" id="inference-model-name" value="custom-model"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="inference-num-params">Parameters:</label> | |
| <input type="text" id="inference-num-params" value="7B" placeholder="e.g., 7B"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="inference-num-layers">Layers:</label> | |
| <input type="number" id="inference-num-layers" value="32" min="1"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="inference-hidden-size">Hidden Size:</label> | |
| <input type="number" id="inference-hidden-size" value="4096" min="1"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="inference-num-heads">Attention Heads:</label> | |
| <input type="number" id="inference-num-heads" value="32" min="1"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="inference-vocab-size">Vocab Size:</label> | |
| <input type="number" id="inference-vocab-size" value="32000" min="1"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="inference-seq-len">Max Seq Length:</label> | |
| <input type="number" id="inference-seq-len" value="4096" min="1"> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- Inference Settings --> | |
| <section class="config-section"> | |
| <h3>Inference Settings</h3> | |
| <div class="form-grid"> | |
| <div class="form-group"> | |
| <label for="inference-engine" data-tooltip="Inference engine to use">Inference Engine:</label> | |
| <select id="inference-engine"> | |
| <option value="huggingface">HuggingFace Transformers</option> | |
| <option value="vllm" selected>vLLM (Recommended)</option> | |
| <option value="tgi">TGI (HuggingFace TGI)</option> | |
| <option value="tensorrt_llm">TensorRT-LLM</option> | |
| <option value="sglang">SGLang</option> | |
| </select> | |
| </div> | |
| <div class="form-group"> | |
| <label for="inference-batch-size">Batch Size:</label> | |
| <input type="number" id="inference-batch-size" value="32" min="1"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="kv-cache-quantization" data-tooltip="KV cache quantization type">KV Cache Quantization:</label> | |
| <select id="kv-cache-quantization"> | |
| <option value="none" selected>NONE (FP16)</option> | |
| <option value="int8">INT8 (2x compression)</option> | |
| <option value="fp8">FP8 (4x compression)</option> | |
| <option value="int4">INT4 (8x compression)</option> | |
| </select> | |
| </div> | |
| <div class="form-group"> | |
| <label for="tensor-parallel-size" data-tooltip="Number of GPUs for tensor parallelism">Tensor Parallel Size:</label> | |
| <input type="number" id="tensor-parallel-size" value="1" min="1" max="8"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="gpu-memory-util" data-tooltip="GPU memory utilization (0.0-1.0)">GPU Memory Utilization:</label> | |
| <input type="range" id="gpu-memory-util" min="0.5" max="0.95" step="0.05" value="0.9"> | |
| <span id="gpu-memory-util-value">0.90</span> | |
| </div> | |
| <div class="form-group"> | |
| <label for="inference-gpu-model">GPU Model:</label> | |
| <select id="inference-gpu-model"> | |
| <option value="24">RTX 4090 - 24GB</option> | |
| <option value="32">V100 - 32GB</option> | |
| <option value="40">A100 - 40GB</option> | |
| <option value="80" selected>A100 - 80GB / H100 - 80GB</option> | |
| <option value="141">H200 - 141GB</option> | |
| </select> | |
| </div> | |
| <div class="form-group"> | |
| <label for="inference-num-gpus">Number of GPUs:</label> | |
| <input type="number" id="inference-num-gpus" value="1" min="1"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="use-kv-cache">Enable KV Cache:</label> | |
| <input type="checkbox" id="use-kv-cache" checked> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- TGI-specific Settings --> | |
| <section class="config-section" id="tgi-settings" style="display:none;"> | |
| <h3>TGI-Specific Settings</h3> | |
| <div class="form-grid"> | |
| <div class="form-group"> | |
| <label for="max-total-tokens" data-tooltip="Most important: defines memory budget (input + output)">Max Total Tokens:</label> | |
| <input type="number" id="max-total-tokens" value="4096" min="1" placeholder="e.g., 4096"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="max-input-tokens">Max Input Tokens:</label> | |
| <input type="number" id="max-input-tokens" value="2048" min="1" placeholder="e.g., 2048"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="max-batch-total-tokens">Max Batch Total Tokens:</label> | |
| <input type="number" id="max-batch-total-tokens" value="8192" min="1" placeholder="e.g., 8192"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="tgi-quantize">Weight Quantization:</label> | |
| <select id="tgi-quantize"> | |
| <option value="none" selected>NONE</option> | |
| <option value="awq">AWQ</option> | |
| <option value="eetq">EETQ</option> | |
| <option value="exl2">EXL2</option> | |
| <option value="gptq">GPTQ</option> | |
| <option value="marlin">Marlin</option> | |
| <option value="bitsandbytes">BitsAndBytes (8-bit)</option> | |
| <option value="bitsandbytes-nf4">BitsAndBytes NF4</option> | |
| <option value="bitsandbytes-fp4">BitsAndBytes FP4</option> | |
| <option value="fp8">FP8</option> | |
| </select> | |
| </div> | |
| <div class="form-group"> | |
| <label for="tgi-dtype">Data Type:</label> | |
| <select id="tgi-dtype"> | |
| <option value="float16">Float16</option> | |
| <option value="bfloat16" selected>BFloat16</option> | |
| </select> | |
| </div> | |
| <div class="form-group"> | |
| <label for="sharded">Enable Sharded:</label> | |
| <input type="checkbox" id="sharded"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="num-shard">Number of Shards:</label> | |
| <input type="number" id="num-shard" value="1" min="1" placeholder="Auto if empty"> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- vLLM-specific Settings --> | |
| <section class="config-section" id="vllm-settings" style="display:none;"> | |
| <h3>vLLM-Specific Settings</h3> | |
| <div class="form-grid"> | |
| <div class="form-group"> | |
| <label for="block-size" data-tooltip="Block size for paged KV cache (default: 16)">Block Size:</label> | |
| <select id="block-size"> | |
| <option value="">Auto (16)</option> | |
| <option value="1">1</option> | |
| <option value="8">8</option> | |
| <option value="16" selected>16</option> | |
| <option value="32">32</option> | |
| <option value="64">64</option> | |
| <option value="128">128</option> | |
| </select> | |
| </div> | |
| <div class="form-group"> | |
| <label for="swap-space-gb">CPU Swap Space (GB):</label> | |
| <input type="number" id="swap-space-gb" value="0" min="0" step="0.1"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="enable-prefix-caching">Enable Prefix Caching:</label> | |
| <input type="checkbox" id="enable-prefix-caching"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="enforce-eager">Enable Eager Mode:</label> | |
| <input type="checkbox" id="enforce-eager"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="max-num-batched-tokens">Max Batched Tokens:</label> | |
| <input type="number" id="max-num-batched-tokens" placeholder="Auto if empty"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="max-num-seqs">Max Sequences per Batch:</label> | |
| <input type="number" id="max-num-seqs" placeholder="Auto if empty"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="vllm-quantization">Weight Quantization:</label> | |
| <select id="vllm-quantization"> | |
| <option value="none" selected>NONE</option> | |
| <option value="awq">AWQ</option> | |
| <option value="gptq">GPTQ</option> | |
| <option value="squeezellm">SqueezeLLM</option> | |
| <option value="fp8">FP8</option> | |
| </select> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- TensorRT-LLM-specific Settings --> | |
| <section class="config-section" id="tensorrt-settings" style="display:none;"> | |
| <h3>TensorRT-LLM-Specific Settings</h3> | |
| <div class="form-grid"> | |
| <div class="form-group"> | |
| <label for="trt-max-batch-size">Max Batch Size:</label> | |
| <input type="number" id="trt-max-batch-size" value="2048" min="1"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="trt-max-input-len">Max Input Length:</label> | |
| <input type="number" id="trt-max-input-len" value="1024" min="1"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="trt-max-seq-len">Max Sequence Length:</label> | |
| <input type="number" id="trt-max-seq-len" value="2048" min="1"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="trt-max-beam-width">Max Beam Width:</label> | |
| <input type="number" id="trt-max-beam-width" value="1" min="1"> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- SGLang-specific Settings --> | |
| <section class="config-section" id="sglang-settings" style="display:none;"> | |
| <h3>SGLang-Specific Settings</h3> | |
| <div class="form-grid"> | |
| <div class="form-group"> | |
| <label for="chunk-size" data-tooltip="Prefill chunk size for long contexts (default: 8192)">Chunk Size:</label> | |
| <input type="number" id="chunk-size" value="8192" min="1" placeholder="e.g., 8192"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="max-running-requests" data-tooltip="Maximum number of concurrent requests">Max Running Requests:</label> | |
| <input type="number" id="max-running-requests" value="128" min="1" placeholder="e.g., 128"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="radix-cache-max-seq-len" data-tooltip="Maximum sequence length for RadixCache">RadixCache Max Seq Len:</label> | |
| <input type="number" id="radix-cache-max-seq-len" value="8192" min="1" placeholder="e.g., 8192"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="attention-backend" data-tooltip="Attention backend implementation">Attention Backend:</label> | |
| <select id="attention-backend"> | |
| <option value="flashinfer" selected>FlashInfer</option> | |
| <option value="triton">Triton</option> | |
| <option value="torch">Torch</option> | |
| </select> | |
| </div> | |
| <div class="form-group"> | |
| <label for="speculative-algo" data-tooltip="Speculative decoding algorithm">Speculative Algorithm:</label> | |
| <select id="speculative-algo"> | |
| <option value="default" selected>Default</option> | |
| <option value="medusa">Medusa</option> | |
| <option value="eagle">EAGLE</option> | |
| </select> | |
| </div> | |
| <div class="form-group"> | |
| <label for="disable-radix-cache">Disable RadixCache:</label> | |
| <input type="checkbox" id="disable-radix-cache"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="enable-p2p">Enable P2P Attention:</label> | |
| <input type="checkbox" id="enable-p2p"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="disable-custom-all-reduce">Disable Custom All-Reduce:</label> | |
| <input type="checkbox" id="disable-custom-all-reduce"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="enable-torch-compile">Enable torch.compile:</label> | |
| <input type="checkbox" id="enable-torch-compile"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="multi-lora-enabled">Enable Multi-LoRA:</label> | |
| <input type="checkbox" id="multi-lora-enabled"> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- Calculate Button --> | |
| <div class="button-group"> | |
| <button id="inference-calculate-btn" class="btn-primary">Calculate Inference Memory</button> | |
| <button id="inference-reset-btn" class="btn-secondary">Reset</button> | |
| </div> | |
| </div> | |
| <!-- Inference Results Panel --> | |
| <div class="results-panel"> | |
| <h2>Inference Results</h2> | |
| <div class="result-card"> | |
| <h3>Memory Breakdown</h3> | |
| <div class="metric"> | |
| <span class="metric-label">Per GPU:</span> | |
| <span class="metric-value" id="inference-result-per-gpu">-- GB</span> | |
| </div> | |
| <div class="metric"> | |
| <span class="metric-label">Total All GPUs:</span> | |
| <span class="metric-value" id="inference-result-total">-- GB</span> | |
| </div> | |
| <div class="metric"> | |
| <span class="metric-label">Model Parameters:</span> | |
| <span class="metric-value" id="inference-result-params">-- GB</span> | |
| </div> | |
| <div class="metric"> | |
| <span class="metric-label">KV Cache:</span> | |
| <span class="metric-value" id="inference-result-kv-cache">-- GB</span> | |
| </div> | |
| <div class="metric"> | |
| <span class="metric-label">Activations:</span> | |
| <span class="metric-value" id="inference-result-activations">-- GB</span> | |
| </div> | |
| </div> | |
| <div class="result-card"> | |
| <h3>Performance Estimates</h3> | |
| <div class="metric"> | |
| <span class="metric-label">Max Batch Size:</span> | |
| <span class="metric-value" id="inference-max-batch">--</span> | |
| </div> | |
| <div class="metric"> | |
| <span class="metric-label">Estimated Throughput:</span> | |
| <span class="metric-value" id="inference-throughput">-- tokens/sec</span> | |
| </div> | |
| <div class="metric"> | |
| <span class="metric-label">Fits on GPU:</span> | |
| <span class="metric-value" id="inference-fits">--</span> | |
| </div> | |
| <div class="metric"> | |
| <span class="metric-label">Utilization:</span> | |
| <span class="metric-value" id="inference-utilization">--%</span> | |
| </div> | |
| </div> | |
| </div> | |
| </div><!-- End Inference Tab --> | |
| <!-- Multi-Node Tab --> | |
| <div id="multinode-tab" class="tab-content" style="display:none;"> | |
| <div class="config-panel"> | |
| <h2>Multi-Node Training Configuration</h2> | |
| <p class="info-text">Calculate network communication overhead for distributed training across multiple nodes.</p> | |
| <!-- Model Settings --> | |
| <section class="config-section"> | |
| <h3>Model Settings</h3> | |
| <div class="form-group"> | |
| <label for="multinode-preset-select">Preset Model:</label> | |
| <select id="multinode-preset-select"> | |
| <option value="custom">Custom</option> | |
| <optgroup label="Dense Models"> | |
| <option value="llama2-7b">LLaMA 2 7B</option> | |
| <option value="llama2-13b">LLaMA 2 13B</option> | |
| <option value="llama2-70b">LLaMA 2 70B</option> | |
| <option value="gpt3-175b">GPT-3 175B</option> | |
| </optgroup> | |
| <optgroup label="MoE (Mixture of Experts) Models"> | |
| <option value="glm-4.7-355b">GLM-4.7 355B (MoE) โญ Latest</option> | |
| <option value="glm-4.5-air-106b">GLM-4.5 Air 106B (MoE) โญ Air</option> | |
| <option value="glm-4-9b">GLM-4 9B (MoE)</option> | |
| <option value="mixtral-8x7b">Mixtral 8x7B (MoE)</option> | |
| <option value="qwen1.5-moe-a2.7b">Qwen1.5-MoE-A2.7B</option> | |
| <option value="deepseek-moe-16b">DeepSeek-MoE 16B</option> | |
| </optgroup> | |
| </select> | |
| </div> | |
| <div class="form-grid"> | |
| <div class="form-group"> | |
| <label for="multinode-num-params">Parameters:</label> | |
| <input type="text" id="multinode-num-params" value="7B" placeholder="e.g., 7B"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="multinode-dtype">Precision:</label> | |
| <select id="multinode-dtype"> | |
| <option value="bf16" selected>BF16</option> | |
| <option value="fp16">FP16</option> | |
| <option value="fp32">FP32</option> | |
| </select> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- Node Configuration --> | |
| <section class="config-section"> | |
| <h3>Node Configuration</h3> | |
| <div class="form-grid"> | |
| <div class="form-group"> | |
| <label for="num-nodes">Number of Nodes:</label> | |
| <input type="number" id="num-nodes" value="2" min="1"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="gpus-per-node">GPUs per Node:</label> | |
| <input type="number" id="gpus-per-node" value="8" min="1"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="interconnect-type" data-tooltip="Network interconnect between nodes">Interconnect Type:</label> | |
| <select id="interconnect-type"> | |
| <option value="infiniband" selected>InfiniBand (200 Gbps)</option> | |
| <option value="nvlink">NVLink (900 Gbps)</option> | |
| <option value="ethernet_200g">Ethernet 200G</option> | |
| <option value="ethernet_100g">Ethernet 100G</option> | |
| <option value="ethernet_25g">Ethernet 25G</option> | |
| <option value="ethernet_10g">Ethernet 10G</option> | |
| </select> | |
| </div> | |
| </div> | |
| <p class="info-text">Total GPUs: <span id="multinode-total-gpus">16</span></p> | |
| </section> | |
| <!-- Training Configuration --> | |
| <section class="config-section"> | |
| <h3>Training Configuration</h3> | |
| <div class="form-grid"> | |
| <div class="form-group"> | |
| <label for="multinode-engine">Training Engine:</label> | |
| <select id="multinode-engine"> | |
| <option value="pytorch_ddp">PyTorch DDP</option> | |
| <option value="deepspeed" selected>DeepSpeed ZeRO</option> | |
| <option value="megatron_lm">Megatron-LM</option> | |
| <option value="fsdp">PyTorch FSDP</option> | |
| </select> | |
| </div> | |
| <div class="form-group" id="multinode-zero-stage-group"> | |
| <label for="multinode-zero-stage">ZeRO Stage:</label> | |
| <select id="multinode-zero-stage"> | |
| <option value="1">1: Shard optimizer states</option> | |
| <option value="2">2: Shard optimizer + gradients</option> | |
| <option value="3" selected>3: Shard everything</option> | |
| </select> | |
| </div> | |
| <div class="form-group"> | |
| <label for="multinode-batch-size">Batch Size per GPU:</label> | |
| <input type="number" id="multinode-batch-size" value="4" min="1"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="multinode-seq-len">Sequence Length:</label> | |
| <input type="number" id="multinode-seq-len" value="4096" min="1"> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- Parallelism Settings --> | |
| <section class="config-section"> | |
| <h3>Parallelism Strategy</h3> | |
| <div class="form-grid"> | |
| <div class="form-group"> | |
| <label for="multinode-tensor-pp">Tensor Parallel:</label> | |
| <input type="number" id="multinode-tensor-pp" value="1" min="1"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="multinode-pipeline-pp">Pipeline Parallel:</label> | |
| <input type="number" id="multinode-pipeline-pp" value="1" min="1"> | |
| </div> | |
| <div class="form-group"> | |
| <label for="multinode-seq-parallel"> | |
| <input type="checkbox" id="multinode-seq-parallel"> | |
| Enable Sequence Parallel | |
| </label> | |
| </div> | |
| <div class="form-group"> | |
| <label for="multinode-optimize"> | |
| <input type="checkbox" id="multinode-optimize" checked> | |
| Auto-optimize Strategy | |
| </label> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- Calculate Button --> | |
| <div class="button-group"> | |
| <button id="multinode-calculate-btn" class="btn-primary">Calculate Network Overhead</button> | |
| <button id="multinode-reset-btn" class="btn-secondary">Reset</button> | |
| </div> | |
| </div> | |
| <!-- Multi-Node Results Panel --> | |
| <div class="results-panel"> | |
| <h2>Multi-Node Results</h2> | |
| <div class="result-card"> | |
| <h3>Network Overhead</h3> | |
| <div class="metric"> | |
| <span class="metric-label">Total Overhead:</span> | |
| <span class="metric-value" id="multinode-overhead-total">-- GB</span> | |
| </div> | |
| <div class="metric"> | |
| <span class="metric-label">AllReduce:</span> | |
| <span class="metric-value" id="multinode-overhead-allreduce">-- GB</span> | |
| </div> | |
| <div class="metric"> | |
| <span class="metric-label">AllGather:</span> | |
| <span class="metric-value" id="multinode-overhead-allgather">-- GB</span> | |
| </div> | |
| <div class="metric"> | |
| <span class="metric-label">ReduceScatter:</span> | |
| <span class="metric-value" id="multinode-overhead-reducescatter">-- GB</span> | |
| </div> | |
| <div class="metric"> | |
| <span class="metric-label">Pipeline Comm:</span> | |
| <span class="metric-value" id="multinode-overhead-pipeline">-- GB</span> | |
| </div> | |
| </div> | |
| <div class="result-card"> | |
| <h3>Time Impact</h3> | |
| <div class="metric"> | |
| <span class="metric-label">Est. Overhead:</span> | |
| <span class="metric-value" id="multinode-time-overhead">-- ms/step</span> | |
| </div> | |
| <div class="metric"> | |
| <span class="metric-label">Communication Time:</span> | |
| <span class="metric-value" id="multinode-comm-time">-- ms/step</span> | |
| </div> | |
| <div class="metric"> | |
| <span class="metric-label">Latency Impact:</span> | |
| <span class="metric-value" id="multinode-latency">-- ms</span> | |
| </div> | |
| </div> | |
| <div class="result-card"> | |
| <h3>Optimization Suggestions</h3> | |
| <div id="multinode-suggestions"> | |
| <p>Run calculation to see optimization suggestions.</p> | |
| </div> | |
| </div> | |
| </div> | |
| </div><!-- End Multi-Node Tab --> | |
| </div> | |
| <div id="error-message" class="error-message" style="display:none"></div> | |
| </div> | |
| <script src="/static/js/app.js"></script> | |
| </body> | |
| </html> | |