Spaces:

george614
/

gpu-memory-calculator

Running

gpu-memory-calculator / web /templates /index.html

George Yang

Feat: Sync all features from main repository

e9c64c8 3 months ago

62.2 kB

	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>GPU Memory Calculator for LLM Training</title>
	<link rel="stylesheet" href="/static/css/styles.css">
	</head>
	<body>
	<div class="container">
	<header>
	<h1>🚀 GPU Memory Calculator</h1>
	<p class="subtitle">For LLM Training, Inference, and Multi-Node Optimization</p>
	</header>

	<!-- Tab Navigation -->
	<nav class="tab-navigation">
	<button class="tab-btn active" data-tab="training">🎓 Training</button>
	<button class="tab-btn" data-tab="inference">🚀 Inference</button>
	<button class="tab-btn" data-tab="multinode">🌐 Multi-Node</button>
	</nav>

	<div class="main-content">
	<!-- Training Tab -->
	<div id="training-tab" class="tab-content active">
	<!-- Configuration Panel -->
	<div class="config-panel">
	<h2>Training Configuration</h2>

	<!-- Model Settings -->
	<section class="config-section">
	<h3>Model Settings</h3>
	<div class="form-group">
	<label for="preset-select">Preset Model:</label>
	<div class="preset-row">
	<select id="preset-select">
	<option value="custom">Custom</option>
	<optgroup label="Dense Models">
	<option value="llama2-7b">LLaMA 2 7B</option>
	<option value="llama2-13b">LLaMA 2 13B</option>
	<option value="llama2-70b">LLaMA 2 70B</option>
	<option value="gpt3-175b">GPT-3 175B</option>
	</optgroup>
	<optgroup label="MoE (Mixture of Experts) Models">
	<option value="glm-4.7-355b">GLM-4.7 355B (MoE) ⭐ Latest</option>
	<option value="glm-4.5-air-106b">GLM-4.5 Air 106B (MoE) ⭐ Air</option>
	<option value="glm-4-9b">GLM-4 9B (MoE)</option>
	<option value="mixtral-8x7b">Mixtral 8x7B (MoE)</option>
	<option value="qwen1.5-moe-a2.7b">Qwen1.5-MoE-A2.7B</option>
	<option value="deepseek-moe-16b">DeepSeek-MoE 16B</option>
	</optgroup>
	</select>
	<button id="fetch-hf-btn" class="btn-tertiary" title="Fetch from HuggingFace Hub" type="button">
	<span>🤗 Fetch from HF</span>
	</button>
	</div>
	</div>

	<!-- HF Fetch Panel (hidden by default) -->
	<div id="hf-fetch-panel" style="display: none;" class="hf-fetch-panel">
	<div class="form-group">
	<label for="hf-model-id">HuggingFace Model ID:</label>
	<input type="text" id="hf-model-id" placeholder="e.g., meta-llama/Llama-2-7b-hf" aria-describedby="hf-model-help">
	<span id="hf-model-help" class="help-text">Enter the HuggingFace model repository ID (e.g., meta-llama/Llama-2-7b-hf)</span>
	</div>
	<div class="form-group">
	<label for="hf-token">HF Token (optional, for private models):</label>
	<input type="password" id="hf-token" placeholder="hf_xxxxxxxxxxxx" aria-describedby="hf-token-help">
	<span id="hf-token-help" class="help-text">Leave empty for public models, provide token for gated/private models</span>
	</div>
	<div class="button-group">
	<button id="hf-fetch-submit" class="btn-primary" type="button">Fetch Model</button>
	<button id="hf-fetch-cancel" class="btn-secondary" type="button">Cancel</button>
	</div>
	<div id="hf-loading" style="display: none;" class="loading-message">
	<p>Fetching model from HuggingFace Hub...</p>
	</div>
	<div id="hf-error" style="display: none;" class="error-message" aria-live="poloice"></div>
	<div id="hf-success" style="display: none;" class="success-message" aria-live="police"></div>
	</div>

	<div class="form-grid">
	<div class="form-group" data-tooltip="Name of your model">
	<label for="model-name" id="label-model-name">Model Name:</label>
	<input type="text" id="model-name" value="custom-model" aria-labelledby="label-model-name">
	</div>

	<div class="form-group" data-tooltip="Total number of parameters (e.g., 7B, 7000M, 7000000000)">
	<label for="num-params" id="label-num-params">Parameters:</label>
	<input type="text" id="num-params" value="7B" placeholder="e.g., 7B" aria-labelledby="label-num-params" aria-describedby="help-num-params">
	<span id="help-num-params" class="sr-only">Enter model size as number with optional suffix: 7B, 7000M, or 7000000000</span>
	</div>

	<div class="form-group" data-tooltip="Number of transformer layers">
	<label for="num-layers" id="label-num-layers">Layers:</label>
	<input type="number" id="num-layers" value="32" min="1" aria-labelledby="label-num-layers">
	</div>

	<div class="form-group" data-tooltip="Hidden dimension size">
	<label for="hidden-size" id="label-hidden-size">Hidden Size:</label>
	<input type="number" id="hidden-size" value="4096" min="1" aria-labelledby="label-hidden-size">
	</div>

	<div class="form-group" data-tooltip="Number of attention heads">
	<label for="num-heads" id="label-num-heads">Attention Heads:</label>
	<input type="number" id="num-heads" value="32" min="1" aria-labelledby="label-num-heads">
	</div>

	<div class="form-group" data-tooltip="Vocabulary size">
	<label for="vocab-size" id="label-vocab-size">Vocab Size:</label>
	<input type="number" id="vocab-size" value="32000" min="1" aria-labelledby="label-vocab-size">
	</div>

	<div class="form-group" data-tooltip="Maximum sequence length">
	<label for="seq-len" id="label-seq-len">Max Seq Length:</label>
	<input type="number" id="seq-len" value="4096" min="1" aria-labelledby="label-seq-len">
	</div>
	</div>
	</section>

	<!-- MoE (Mixture of Experts) Settings -->
	<section class="config-section">
	<h3>Mixture of Experts (MoE)</h3>
	<div class="form-group" data-tooltip="Enable Mixture of Experts architecture">
	<label for="moe-enabled">
	<input type="checkbox" id="moe-enabled">
	Enable MoE
	</label>
	</div>

	<div id="moe-fields" style="display:none;">
	<div class="form-grid">
	<div class="form-group" data-tooltip="Total number of experts in the model">
	<label for="num-experts">Number of Experts:</label>
	<input type="number" id="num-experts" value="8" min="1" max="256">
	</div>

	<div class="form-group" data-tooltip="Number of experts activated per token (top-k routing)">
	<label for="top-k">Top-K (active experts):</label>
	<input type="number" id="top-k" value="2" min="1" max="8">
	</div>

	<div class="form-group" data-tooltip="Expert intermediate layer size (default: 4x hidden_size)">
	<label for="expert-intermediate-size">Expert Intermediate Size:</label>
	<input type="number" id="expert-intermediate-size" value="" placeholder="Auto (4x hidden)" min="1">
	</div>

	<div class="form-group" data-tooltip="Shared expert intermediate size (for models like GLM)">
	<label for="shared-expert-size">Shared Expert Size:</label>
	<input type="number" id="shared-expert-size" value="" placeholder="None" min="1">
	</div>
	</div>
	<p class="info-text">With MoE, only <strong><span id="active-experts-display">2</span></strong> of <strong><span id="total-experts-display">8</span></strong> experts are active per token, reducing activation memory.</p>
	</div>
	</section>

	<!-- Training Settings -->
	<section class="config-section">
	<h3>Training Settings</h3>
	<div class="form-grid">
	<div class="form-group">
	<label for="batch-size" data-tooltip="Batch size per GPU">Batch Size:</label>
	<input type="number" id="batch-size" value="4" min="1">
	<input type="range" id="batch-size-slider" min="1" max="128" value="4">
	</div>

	<div class="form-group" data-tooltip="Gradient accumulation steps">
	<label for="grad-accum">Gradient Accumulation:</label>
	<input type="number" id="grad-accum" value="4" min="1">
	</div>

	<div class="form-group">
	<label for="optimizer" data-tooltip="Optimizer type">Optimizer:</label>
	<select id="optimizer">
	<option value="adamw">AdamW</option>
	<option value="adam">Adam</option>
	<option value="sgd">SGD</option>
	<option value="adamw_8bit">AdamW 8-bit</option>
	</select>
	</div>

	<div class="form-group">
	<label for="dtype" data-tooltip="Data type for training">Precision:</label>
	<select id="dtype">
	<option value="bf16" selected>BF16</option>
	<option value="fp16">FP16</option>
	<option value="fp32">FP32</option>
	<option value="int8">INT8</option>
	<option value="int4">INT4</option>
	</select>
	</div>

	<div class="form-group">
	<label for="activation-checkpointing" data-tooltip="Activation checkpointing level (0=none, 4=full)">
	Activation Checkpointing:
	</label>
	<select id="activation-checkpointing">
	<option value="0">0: None (most memory)</option>
	<option value="1">1: Checkpoint attention output</option>
	<option value="2" selected>2: Checkpoint attention input</option>
	<option value="3">3: Checkpoint layer + attention</option>
	<option value="4">4: Full checkpointing (least memory)</option>
	</select>
	</div>
	</div>
	</section>

	<!-- Parallelism Settings -->
	<section class="config-section">
	<h3>Parallelism</h3>
	<div class="form-grid">
	<div class="form-group" data-tooltip="Tensor parallelism degree">
	<label for="tensor-pp">Tensor PP:</label>
	<input type="number" id="tensor-pp" value="1" min="1" max="8">
	</div>

	<div class="form-group" data-tooltip="Pipeline parallelism degree">
	<label for="pipeline-pp">Pipeline PP:</label>
	<input type="number" id="pipeline-pp" value="1" min="1" max="16">
	</div>

	<div class="form-group" data-tooltip="Data parallelism degree">
	<label for="data-pp">Data PP:</label>
	<input type="number" id="data-pp" value="8" min="1">
	</div>

	<div class="form-group" data-tooltip="Enable sequence parallelism">
	<label for="seq-parallel">
	<input type="checkbox" id="seq-parallel">
	Sequence Parallel
	</label>
	</div>
	</div>
	<p class="info-text">Effective GPUs: <span id="effective-gpus">8</span></p>
	</section>

	<!-- Engine Settings -->
	<section class="config-section">
	<h3>Training Engine</h3>
	<div class="form-group">
	<label for="engine-type" data-tooltip="Training framework/engine">Engine Type:</label>
	<select id="engine-type">
	<option value="pytorch_ddp">PyTorch DDP</option>
	<option value="deepspeed" selected>DeepSpeed ZeRO</option>
	<option value="megatron_lm">Megatron-LM</option>
	<option value="fsdp">PyTorch FSDP</option>
	<option value="megatron_deepspeed">Megatron + DeepSpeed</option>
	</select>
	</div>

	<div id="engine-options">
	<!-- Dynamic fields based on engine type -->
	<!-- DeepSpeed ZeRO options -->
	<div class="form-group" id="zero-stage-group">
	<label for="zero-stage" data-tooltip="DeepSpeed ZeRO stage (0-3)">ZeRO Stage:</label>
	<select id="zero-stage">
	<option value="0">0: Disabled</option>
	<option value="1">1: Shard optimizer states</option>
	<option value="2">2: Shard optimizer + gradients</option>
	<option value="3" selected>3: Shard everything</option>
	</select>
	</div>

	<div class="form-group" id="offload-opt-group">
	<label for="offload-optimizer" data-tooltip="CPU offload for optimizer states">Offload Optimizer:</label>
	<select id="offload-optimizer">
	<option value="none">None</option>
	<option value="cpu" selected>CPU</option>
	<option value="nvme">NVMe</option>
	</select>
	</div>

	<div class="form-group" id="offload-param-group">
	<label for="offload-param" data-tooltip="CPU offload for parameters">Offload Parameters:</label>
	<select id="offload-param">
	<option value="none" selected>None</option>
	<option value="cpu">CPU</option>
	<option value="nvme">NVMe</option>
	</select>
	</div>

	<!-- ZeRO-Init option -->
	<div class="form-group" id="zero-init-group">
	<label for="zero-init" data-tooltip="Use ZeRO initialization (reduces memory during init)">
	<input type="checkbox" id="zero-init" checked>
	ZeRO Init (ZeRO-3)
	</label>
	</div>

	<!-- FSDP Sharding Strategy -->
	<div class="form-group" id="sharding-strategy-group" style="display:none;">
	<label for="sharding-strategy" data-tooltip="FSDP sharding strategy">Sharding Strategy:</label>
	<select id="sharding-strategy">
	<option value="no_shard">No Sharding (like DDP)</option>
	<option value="shard_grad_op">Shard Gradients + Optimizer (ZeRO-2)</option>
	<option value="full_shard" selected>Full Shard (ZeRO-3)</option>
	</select>
	</div>

	<!-- Megatron-specific options -->
	<div class="form-group" id="megatron-options" style="display:none;">
	<label class="group-label">Megatron-LM Options:</label>
	<div class="form-group" style="margin-top: 10px;">
	<label for="model-parallelism" data-tooltip="Model parallelism strategy">
	<input type="checkbox" id="use-distributed-optimizer">
	Use Distributed Optimizer
	</label>
	</div>
	<div class="form-group" style="margin-top: 5px;">
	<label for="num-micro-batches" data-tooltip="Number of micro-batches for pipeline parallelism">
	Num Micro-Batches (PP):
	<input type="number" id="num-micro-batches" value="1" min="1" max="128">
	</label>
	</div>
	</div>

	<!-- Advanced Training Options -->
	<div class="form-group" style="margin-top: 15px;">
	<label class="group-label">Advanced Training Options:</label>

	<div class="form-group" style="margin-top: 10px;">
	<label for="gradient-clipping" data-tooltip="Gradient clipping threshold (0 = disabled)">
	Gradient Clipping:
	<input type="number" id="gradient-clipping" value="1.0" min="0" step="0.1">
	</label>
	</div>

	<div class="form-group" style="margin-top: 5px;">
	<label for="weight-decay" data-tooltip="Weight decay for regularization">Weight Decay:</label>
	<input type="number" id="weight-decay" value="0.01" min="0" step="0.001">
	</div>

	<div class="form-group" style="margin-top: 5px;">
	<label for="lr" data-tooltip="Learning rate (for reference)">Learning Rate:</label>
	<input type="number" id="lr" value="0.0001" min="0" step="0.00001">
	</div>

	<div class="form-group" style="margin-top: 5px;">
	<label for="warmup-steps" data-tooltip="Learning rate warmup steps">Warmup Steps:</label>
	<input type="number" id="warmup-steps" value="2000" min="0">
	</div>
	</div>
	</div>
	</section>

	<!-- Hardware Settings -->
	<section class="config-section">
	<h3>Hardware</h3>
	<div class="form-grid">
	<div class="form-group" data-tooltip="Number of GPUs">
	<label for="num-gpus">Number of GPUs:</label>
	<input type="number" id="num-gpus" value="8" min="1" max="1024">
	</div>

	<div class="form-group" data-tooltip="GPU model and memory per GPU">
	<label for="gpu-model">GPU Model:</label>
	<select id="gpu-model">
	<option value="16">RTX 4090 - 24GB</option>
	<option value="32">V100 - 32GB</option>
	<option value="40">A100 - 40GB</option>
	<option value="80" selected>A100 - 80GB / H100 - 80GB</option>
	<option value="141">H200 - 141GB</option>
	<option value="192">B200 - 192GB</option>
	<option value="custom">Custom</option>
	</select>
	<input type="number" id="gpu-mem-custom" value="80" min="1" style="display:none">
	</div>
	</div>
	</section>

	<!-- Calculate Buttons -->
	<div class="button-group">
	<button id="calculate-btn" class="btn-primary">Calculate</button>
	<button id="reset-btn" class="btn-secondary">Reset</button>
	</div>
	</div>

	<!-- Results Panel -->
	<div class="results-panel">
	<h2>Results</h2>

	<div class="result-card">
	<h3>Memory Breakdown</h3>
	<div class="metric">
	<span class="metric-label">Per GPU:</span>
	<span class="metric-value" id="result-per-gpu">-- GB</span>
	</div>
	<div class="metric">
	<span class="metric-label">Total All GPUs:</span>
	<span class="metric-value" id="result-total">-- GB</span>
	</div>
	<div class="metric">
	<span class="metric-label">CPU Memory:</span>
	<span class="metric-value" id="result-cpu">-- GB</span>
	</div>
	</div>

	<div class="result-card">
	<h3>Component Breakdown</h3>
	<div class="breakdown-item">
	<span class="breakdown-label">Model Parameters:</span>
	<span class="breakdown-value" id="breakdown-params">-- GB</span>
	</div>
	<div class="breakdown-item">
	<span class="breakdown-label">Gradients:</span>
	<span class="breakdown-value" id="breakdown-grads">-- GB</span>
	</div>
	<div class="breakdown-item">
	<span class="breakdown-label">Optimizer States:</span>
	<span class="breakdown-value" id="breakdown-optimizer">-- GB</span>
	</div>
	<div class="breakdown-item">
	<span class="breakdown-label">Activations:</span>
	<span class="breakdown-value" id="breakdown-activations">-- GB</span>
	</div>
	<div class="breakdown-item">
	<span class="breakdown-label">Overhead:</span>
	<span class="breakdown-value" id="breakdown-overhead">-- GB</span>
	</div>

	<!-- Simple bar chart -->
	<div class="bar-chart" id="breakdown-chart">
	<div class="bar" id="bar-params" style="width: 0%" title="Model Parameters"></div>
	<div class="bar" id="bar-grads" style="width: 0%" title="Gradients"></div>
	<div class="bar" id="bar-optimizer" style="width: 0%" title="Optimizer States"></div>
	<div class="bar" id="bar-activations" style="width: 0%" title="Activations"></div>
	</div>
	<div class="chart-legend">
	<span class="legend-item"><span class="legend-color params"></span>Params</span>
	<span class="legend-item"><span class="legend-color grads"></span>Grads</span>
	<span class="legend-item"><span class="legend-color optimizer"></span>Opt</span>
	<span class="legend-item"><span class="legend-color activations"></span>Act</span>
	</div>
	</div>

	<div class="result-card">
	<h3>Feasibility</h3>
	<div class="metric">
	<span class="metric-label">Status:</span>
	<span class="metric-value" id="feasibility-status">--</span>
	</div>
	<div class="metric">
	<span class="metric-label">Utilization:</span>
	<span class="metric-value" id="feasibility-util">--%</span>
	</div>
	<div class="metric" id="recommended-batch-container" style="display:none">
	<span class="metric-label">Recommended Batch:</span>
	<span class="metric-value" id="recommended-batch">--</span>
	</div>
	</div>

	<div class="result-card">
	<h3>Formula Explanation</h3>
	<div id="formula-description" class="formula-description">
	<p>Run a calculation to see the formula breakdown.</p>
	</div>
	<div id="formula-components" style="display:none;">
	<!-- Formula components will be inserted here -->
	</div>
	<div class="formula-references" style="display:none;">
	<h4>References:</h4>
	<ul id="references-list"></ul>
	</div>
	<button id="show-formula-btn" class="btn-secondary" style="margin-top: 10px; width: 100%;">
	Show Formula Details
	</button>
	</div>

	<div class="button-group">
	<button id="save-config-btn" class="btn-secondary">Save Config</button>
	<button id="copy-json-btn" class="btn-secondary">Copy JSON</button>
	<button id="export-framework-btn" class="btn-secondary">⬇️ Export Framework Config</button>
	</div>
	</div>
	</div><!-- End Training Tab -->

	<!-- Inference Tab -->
	<div id="inference-tab" class="tab-content" style="display:none;">
	<div class="config-panel">
	<h2>Inference Configuration</h2>

	<!-- Model Settings -->
	<section class="config-section">
	<h3>Model Settings</h3>
	<div class="form-group">
	<label for="inference-preset-select">Preset Model:</label>
	<select id="inference-preset-select">
	<option value="custom">Custom</option>
	<optgroup label="Dense Models">
	<option value="llama2-7b">LLaMA 2 7B</option>
	<option value="llama2-13b">LLaMA 2 13B</option>
	<option value="llama2-70b">LLaMA 2 70B</option>
	<option value="gpt3-175b">GPT-3 175B</option>
	</optgroup>
	<optgroup label="MoE (Mixture of Experts) Models">
	<option value="glm-4.7-355b">GLM-4.7 355B (MoE) ⭐ Latest</option>
	<option value="glm-4.5-air-106b">GLM-4.5 Air 106B (MoE) ⭐ Air</option>
	<option value="glm-4-9b">GLM-4 9B (MoE)</option>
	<option value="mixtral-8x7b">Mixtral 8x7B (MoE)</option>
	<option value="qwen1.5-moe-a2.7b">Qwen1.5-MoE-A2.7B</option>
	<option value="deepseek-moe-16b">DeepSeek-MoE 16B</option>
	</optgroup>
	</select>
	</div>

	<div class="form-grid">
	<div class="form-group">
	<label for="inference-model-name">Model Name:</label>
	<input type="text" id="inference-model-name" value="custom-model">
	</div>
	<div class="form-group">
	<label for="inference-num-params">Parameters:</label>
	<input type="text" id="inference-num-params" value="7B" placeholder="e.g., 7B">
	</div>
	<div class="form-group">
	<label for="inference-num-layers">Layers:</label>
	<input type="number" id="inference-num-layers" value="32" min="1">
	</div>
	<div class="form-group">
	<label for="inference-hidden-size">Hidden Size:</label>
	<input type="number" id="inference-hidden-size" value="4096" min="1">
	</div>
	<div class="form-group">
	<label for="inference-num-heads">Attention Heads:</label>
	<input type="number" id="inference-num-heads" value="32" min="1">
	</div>
	<div class="form-group">
	<label for="inference-vocab-size">Vocab Size:</label>
	<input type="number" id="inference-vocab-size" value="32000" min="1">
	</div>
	<div class="form-group">
	<label for="inference-seq-len">Max Seq Length:</label>
	<input type="number" id="inference-seq-len" value="4096" min="1">
	</div>
	</div>
	</section>

	<!-- Inference Settings -->
	<section class="config-section">
	<h3>Inference Settings</h3>
	<div class="form-grid">
	<div class="form-group">
	<label for="inference-engine" data-tooltip="Inference engine to use">Inference Engine:</label>
	<select id="inference-engine">
	<option value="huggingface">HuggingFace Transformers</option>
	<option value="vllm" selected>vLLM (Recommended)</option>
	<option value="tgi">TGI (HuggingFace TGI)</option>
	<option value="tensorrt_llm">TensorRT-LLM</option>
	<option value="sglang">SGLang</option>
	</select>
	</div>
	<div class="form-group">
	<label for="inference-batch-size">Batch Size:</label>
	<input type="number" id="inference-batch-size" value="32" min="1">
	</div>
	<div class="form-group">
	<label for="kv-cache-quantization" data-tooltip="KV cache quantization type">KV Cache Quantization:</label>
	<select id="kv-cache-quantization">
	<option value="none" selected>NONE (FP16)</option>
	<option value="int8">INT8 (2x compression)</option>
	<option value="fp8">FP8 (4x compression)</option>
	<option value="int4">INT4 (8x compression)</option>
	</select>
	</div>
	<div class="form-group">
	<label for="tensor-parallel-size" data-tooltip="Number of GPUs for tensor parallelism">Tensor Parallel Size:</label>
	<input type="number" id="tensor-parallel-size" value="1" min="1" max="8">
	</div>
	<div class="form-group">
	<label for="gpu-memory-util" data-tooltip="GPU memory utilization (0.0-1.0)">GPU Memory Utilization:</label>
	<input type="range" id="gpu-memory-util" min="0.5" max="0.95" step="0.05" value="0.9">
	<span id="gpu-memory-util-value">0.90</span>
	</div>
	<div class="form-group">
	<label for="inference-gpu-model">GPU Model:</label>
	<select id="inference-gpu-model">
	<option value="24">RTX 4090 - 24GB</option>
	<option value="32">V100 - 32GB</option>
	<option value="40">A100 - 40GB</option>
	<option value="80" selected>A100 - 80GB / H100 - 80GB</option>
	<option value="141">H200 - 141GB</option>
	</select>
	</div>
	<div class="form-group">
	<label for="inference-num-gpus">Number of GPUs:</label>
	<input type="number" id="inference-num-gpus" value="1" min="1">
	</div>
	<div class="form-group">
	<label for="use-kv-cache">Enable KV Cache:</label>
	<input type="checkbox" id="use-kv-cache" checked>
	</div>
	</div>
	</section>

	<!-- TGI-specific Settings -->
	<section class="config-section" id="tgi-settings" style="display:none;">
	<h3>TGI-Specific Settings</h3>
	<div class="form-grid">
	<div class="form-group">
	<label for="max-total-tokens" data-tooltip="Most important: defines memory budget (input + output)">Max Total Tokens:</label>
	<input type="number" id="max-total-tokens" value="4096" min="1" placeholder="e.g., 4096">
	</div>
	<div class="form-group">
	<label for="max-input-tokens">Max Input Tokens:</label>
	<input type="number" id="max-input-tokens" value="2048" min="1" placeholder="e.g., 2048">
	</div>
	<div class="form-group">
	<label for="max-batch-total-tokens">Max Batch Total Tokens:</label>
	<input type="number" id="max-batch-total-tokens" value="8192" min="1" placeholder="e.g., 8192">
	</div>
	<div class="form-group">
	<label for="tgi-quantize">Weight Quantization:</label>
	<select id="tgi-quantize">
	<option value="none" selected>NONE</option>
	<option value="awq">AWQ</option>
	<option value="eetq">EETQ</option>
	<option value="exl2">EXL2</option>
	<option value="gptq">GPTQ</option>
	<option value="marlin">Marlin</option>
	<option value="bitsandbytes">BitsAndBytes (8-bit)</option>
	<option value="bitsandbytes-nf4">BitsAndBytes NF4</option>
	<option value="bitsandbytes-fp4">BitsAndBytes FP4</option>
	<option value="fp8">FP8</option>
	</select>
	</div>
	<div class="form-group">
	<label for="tgi-dtype">Data Type:</label>
	<select id="tgi-dtype">
	<option value="float16">Float16</option>
	<option value="bfloat16" selected>BFloat16</option>
	</select>
	</div>
	<div class="form-group">
	<label for="sharded">Enable Sharded:</label>
	<input type="checkbox" id="sharded">
	</div>
	<div class="form-group">
	<label for="num-shard">Number of Shards:</label>
	<input type="number" id="num-shard" value="1" min="1" placeholder="Auto if empty">
	</div>
	</div>
	</section>

	<!-- vLLM-specific Settings -->
	<section class="config-section" id="vllm-settings" style="display:none;">
	<h3>vLLM-Specific Settings</h3>
	<div class="form-grid">
	<div class="form-group">
	<label for="block-size" data-tooltip="Block size for paged KV cache (default: 16)">Block Size:</label>
	<select id="block-size">
	<option value="">Auto (16)</option>
	<option value="1">1</option>
	<option value="8">8</option>
	<option value="16" selected>16</option>
	<option value="32">32</option>
	<option value="64">64</option>
	<option value="128">128</option>
	</select>
	</div>
	<div class="form-group">
	<label for="swap-space-gb">CPU Swap Space (GB):</label>
	<input type="number" id="swap-space-gb" value="0" min="0" step="0.1">
	</div>
	<div class="form-group">
	<label for="enable-prefix-caching">Enable Prefix Caching:</label>
	<input type="checkbox" id="enable-prefix-caching">
	</div>
	<div class="form-group">
	<label for="enforce-eager">Enable Eager Mode:</label>
	<input type="checkbox" id="enforce-eager">
	</div>
	<div class="form-group">
	<label for="max-num-batched-tokens">Max Batched Tokens:</label>
	<input type="number" id="max-num-batched-tokens" placeholder="Auto if empty">
	</div>
	<div class="form-group">
	<label for="max-num-seqs">Max Sequences per Batch:</label>
	<input type="number" id="max-num-seqs" placeholder="Auto if empty">
	</div>
	<div class="form-group">
	<label for="vllm-quantization">Weight Quantization:</label>
	<select id="vllm-quantization">
	<option value="none" selected>NONE</option>
	<option value="awq">AWQ</option>
	<option value="gptq">GPTQ</option>
	<option value="squeezellm">SqueezeLLM</option>
	<option value="fp8">FP8</option>
	</select>
	</div>
	</div>
	</section>

	<!-- TensorRT-LLM-specific Settings -->
	<section class="config-section" id="tensorrt-settings" style="display:none;">
	<h3>TensorRT-LLM-Specific Settings</h3>
	<div class="form-grid">
	<div class="form-group">
	<label for="trt-max-batch-size">Max Batch Size:</label>
	<input type="number" id="trt-max-batch-size" value="2048" min="1">
	</div>
	<div class="form-group">
	<label for="trt-max-input-len">Max Input Length:</label>
	<input type="number" id="trt-max-input-len" value="1024" min="1">
	</div>
	<div class="form-group">
	<label for="trt-max-seq-len">Max Sequence Length:</label>
	<input type="number" id="trt-max-seq-len" value="2048" min="1">
	</div>
	<div class="form-group">
	<label for="trt-max-beam-width">Max Beam Width:</label>
	<input type="number" id="trt-max-beam-width" value="1" min="1">
	</div>
	</div>
	</section>

	<!-- SGLang-specific Settings -->
	<section class="config-section" id="sglang-settings" style="display:none;">
	<h3>SGLang-Specific Settings</h3>
	<div class="form-grid">
	<div class="form-group">
	<label for="chunk-size" data-tooltip="Prefill chunk size for long contexts (default: 8192)">Chunk Size:</label>
	<input type="number" id="chunk-size" value="8192" min="1" placeholder="e.g., 8192">
	</div>
	<div class="form-group">
	<label for="max-running-requests" data-tooltip="Maximum number of concurrent requests">Max Running Requests:</label>
	<input type="number" id="max-running-requests" value="128" min="1" placeholder="e.g., 128">
	</div>
	<div class="form-group">
	<label for="radix-cache-max-seq-len" data-tooltip="Maximum sequence length for RadixCache">RadixCache Max Seq Len:</label>
	<input type="number" id="radix-cache-max-seq-len" value="8192" min="1" placeholder="e.g., 8192">
	</div>
	<div class="form-group">
	<label for="attention-backend" data-tooltip="Attention backend implementation">Attention Backend:</label>
	<select id="attention-backend">
	<option value="flashinfer" selected>FlashInfer</option>
	<option value="triton">Triton</option>
	<option value="torch">Torch</option>
	</select>
	</div>
	<div class="form-group">
	<label for="speculative-algo" data-tooltip="Speculative decoding algorithm">Speculative Algorithm:</label>
	<select id="speculative-algo">
	<option value="default" selected>Default</option>
	<option value="medusa">Medusa</option>
	<option value="eagle">EAGLE</option>
	</select>
	</div>
	<div class="form-group">
	<label for="disable-radix-cache">Disable RadixCache:</label>
	<input type="checkbox" id="disable-radix-cache">
	</div>
	<div class="form-group">
	<label for="enable-p2p">Enable P2P Attention:</label>
	<input type="checkbox" id="enable-p2p">
	</div>
	<div class="form-group">
	<label for="disable-custom-all-reduce">Disable Custom All-Reduce:</label>
	<input type="checkbox" id="disable-custom-all-reduce">
	</div>
	<div class="form-group">
	<label for="enable-torch-compile">Enable torch.compile:</label>
	<input type="checkbox" id="enable-torch-compile">
	</div>
	<div class="form-group">
	<label for="multi-lora-enabled">Enable Multi-LoRA:</label>
	<input type="checkbox" id="multi-lora-enabled">
	</div>
	</div>
	</section>

	<!-- Calculate Button -->
	<div class="button-group">
	<button id="inference-calculate-btn" class="btn-primary">Calculate Inference Memory</button>
	<button id="inference-reset-btn" class="btn-secondary">Reset</button>
	</div>
	</div>

	<!-- Inference Results Panel -->
	<div class="results-panel">
	<h2>Inference Results</h2>

	<div class="result-card">
	<h3>Memory Breakdown</h3>
	<div class="metric">
	<span class="metric-label">Per GPU:</span>
	<span class="metric-value" id="inference-result-per-gpu">-- GB</span>
	</div>
	<div class="metric">
	<span class="metric-label">Total All GPUs:</span>
	<span class="metric-value" id="inference-result-total">-- GB</span>
	</div>
	<div class="metric">
	<span class="metric-label">Model Parameters:</span>
	<span class="metric-value" id="inference-result-params">-- GB</span>
	</div>
	<div class="metric">
	<span class="metric-label">KV Cache:</span>
	<span class="metric-value" id="inference-result-kv-cache">-- GB</span>
	</div>
	<div class="metric">
	<span class="metric-label">Activations:</span>
	<span class="metric-value" id="inference-result-activations">-- GB</span>
	</div>
	</div>

	<div class="result-card">
	<h3>Performance Estimates</h3>
	<div class="metric">
	<span class="metric-label">Max Batch Size:</span>
	<span class="metric-value" id="inference-max-batch">--</span>
	</div>
	<div class="metric">
	<span class="metric-label">Estimated Throughput:</span>
	<span class="metric-value" id="inference-throughput">-- tokens/sec</span>
	</div>
	<div class="metric">
	<span class="metric-label">Fits on GPU:</span>
	<span class="metric-value" id="inference-fits">--</span>
	</div>
	<div class="metric">
	<span class="metric-label">Utilization:</span>
	<span class="metric-value" id="inference-utilization">--%</span>
	</div>
	</div>
	</div>
	</div><!-- End Inference Tab -->

	<!-- Multi-Node Tab -->
	<div id="multinode-tab" class="tab-content" style="display:none;">
	<div class="config-panel">
	<h2>Multi-Node Training Configuration</h2>
	<p class="info-text">Calculate network communication overhead for distributed training across multiple nodes.</p>

	<!-- Model Settings -->
	<section class="config-section">
	<h3>Model Settings</h3>
	<div class="form-group">
	<label for="multinode-preset-select">Preset Model:</label>
	<select id="multinode-preset-select">
	<option value="custom">Custom</option>
	<optgroup label="Dense Models">
	<option value="llama2-7b">LLaMA 2 7B</option>
	<option value="llama2-13b">LLaMA 2 13B</option>
	<option value="llama2-70b">LLaMA 2 70B</option>
	<option value="gpt3-175b">GPT-3 175B</option>
	</optgroup>
	<optgroup label="MoE (Mixture of Experts) Models">
	<option value="glm-4.7-355b">GLM-4.7 355B (MoE) ⭐ Latest</option>
	<option value="glm-4.5-air-106b">GLM-4.5 Air 106B (MoE) ⭐ Air</option>
	<option value="glm-4-9b">GLM-4 9B (MoE)</option>
	<option value="mixtral-8x7b">Mixtral 8x7B (MoE)</option>
	<option value="qwen1.5-moe-a2.7b">Qwen1.5-MoE-A2.7B</option>
	<option value="deepseek-moe-16b">DeepSeek-MoE 16B</option>
	</optgroup>
	</select>
	</div>

	<div class="form-grid">
	<div class="form-group">
	<label for="multinode-num-params">Parameters:</label>
	<input type="text" id="multinode-num-params" value="7B" placeholder="e.g., 7B">
	</div>
	<div class="form-group">
	<label for="multinode-dtype">Precision:</label>
	<select id="multinode-dtype">
	<option value="bf16" selected>BF16</option>
	<option value="fp16">FP16</option>
	<option value="fp32">FP32</option>
	</select>
	</div>
	</div>
	</section>

	<!-- Node Configuration -->
	<section class="config-section">
	<h3>Node Configuration</h3>
	<div class="form-grid">
	<div class="form-group">
	<label for="num-nodes">Number of Nodes:</label>
	<input type="number" id="num-nodes" value="2" min="1">
	</div>
	<div class="form-group">
	<label for="gpus-per-node">GPUs per Node:</label>
	<input type="number" id="gpus-per-node" value="8" min="1">
	</div>
	<div class="form-group">
	<label for="interconnect-type" data-tooltip="Network interconnect between nodes">Interconnect Type:</label>
	<select id="interconnect-type">
	<option value="infiniband" selected>InfiniBand (200 Gbps)</option>
	<option value="nvlink">NVLink (900 Gbps)</option>
	<option value="ethernet_200g">Ethernet 200G</option>
	<option value="ethernet_100g">Ethernet 100G</option>
	<option value="ethernet_25g">Ethernet 25G</option>
	<option value="ethernet_10g">Ethernet 10G</option>
	</select>
	</div>
	</div>
	<p class="info-text">Total GPUs: <span id="multinode-total-gpus">16</span></p>
	</section>

	<!-- Training Configuration -->
	<section class="config-section">
	<h3>Training Configuration</h3>
	<div class="form-grid">
	<div class="form-group">
	<label for="multinode-engine">Training Engine:</label>
	<select id="multinode-engine">
	<option value="pytorch_ddp">PyTorch DDP</option>
	<option value="deepspeed" selected>DeepSpeed ZeRO</option>
	<option value="megatron_lm">Megatron-LM</option>
	<option value="fsdp">PyTorch FSDP</option>
	</select>
	</div>
	<div class="form-group" id="multinode-zero-stage-group">
	<label for="multinode-zero-stage">ZeRO Stage:</label>
	<select id="multinode-zero-stage">
	<option value="1">1: Shard optimizer states</option>
	<option value="2">2: Shard optimizer + gradients</option>
	<option value="3" selected>3: Shard everything</option>
	</select>
	</div>
	<div class="form-group">
	<label for="multinode-batch-size">Batch Size per GPU:</label>
	<input type="number" id="multinode-batch-size" value="4" min="1">
	</div>
	<div class="form-group">
	<label for="multinode-seq-len">Sequence Length:</label>
	<input type="number" id="multinode-seq-len" value="4096" min="1">
	</div>
	</div>
	</section>

	<!-- Parallelism Settings -->
	<section class="config-section">
	<h3>Parallelism Strategy</h3>
	<div class="form-grid">
	<div class="form-group">
	<label for="multinode-tensor-pp">Tensor Parallel:</label>
	<input type="number" id="multinode-tensor-pp" value="1" min="1">
	</div>
	<div class="form-group">
	<label for="multinode-pipeline-pp">Pipeline Parallel:</label>
	<input type="number" id="multinode-pipeline-pp" value="1" min="1">
	</div>
	<div class="form-group">
	<label for="multinode-seq-parallel">
	<input type="checkbox" id="multinode-seq-parallel">
	Enable Sequence Parallel
	</label>
	</div>
	<div class="form-group">
	<label for="multinode-optimize">
	<input type="checkbox" id="multinode-optimize" checked>
	Auto-optimize Strategy
	</label>
	</div>
	</div>
	</section>

	<!-- Calculate Button -->
	<div class="button-group">
	<button id="multinode-calculate-btn" class="btn-primary">Calculate Network Overhead</button>
	<button id="multinode-reset-btn" class="btn-secondary">Reset</button>
	</div>
	</div>

	<!-- Multi-Node Results Panel -->
	<div class="results-panel">
	<h2>Multi-Node Results</h2>

	<div class="result-card">
	<h3>Network Overhead</h3>
	<div class="metric">
	<span class="metric-label">Total Overhead:</span>
	<span class="metric-value" id="multinode-overhead-total">-- GB</span>
	</div>
	<div class="metric">
	<span class="metric-label">AllReduce:</span>
	<span class="metric-value" id="multinode-overhead-allreduce">-- GB</span>
	</div>
	<div class="metric">
	<span class="metric-label">AllGather:</span>
	<span class="metric-value" id="multinode-overhead-allgather">-- GB</span>
	</div>
	<div class="metric">
	<span class="metric-label">ReduceScatter:</span>
	<span class="metric-value" id="multinode-overhead-reducescatter">-- GB</span>
	</div>
	<div class="metric">
	<span class="metric-label">Pipeline Comm:</span>
	<span class="metric-value" id="multinode-overhead-pipeline">-- GB</span>
	</div>
	</div>

	<div class="result-card">
	<h3>Time Impact</h3>
	<div class="metric">
	<span class="metric-label">Est. Overhead:</span>
	<span class="metric-value" id="multinode-time-overhead">-- ms/step</span>
	</div>
	<div class="metric">
	<span class="metric-label">Communication Time:</span>
	<span class="metric-value" id="multinode-comm-time">-- ms/step</span>
	</div>
	<div class="metric">
	<span class="metric-label">Latency Impact:</span>
	<span class="metric-value" id="multinode-latency">-- ms</span>
	</div>
	</div>

	<div class="result-card">
	<h3>Optimization Suggestions</h3>
	<div id="multinode-suggestions">
	<p>Run calculation to see optimization suggestions.</p>
	</div>
	</div>
	</div>
	</div><!-- End Multi-Node Tab -->

	</div>

	<div id="error-message" class="error-message" style="display:none"></div>
	</div>

	<script src="/static/js/app.js"></script>
	</body>
	</html>