VRAM-Calculator / index.html
Boof2015's picture
Update index.html
ef4950b verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>VRAM Estimator | NovaAI</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
<style>
:root {
--bg-body: #0f0f0f;
--bg-card: #141414;
--bg-card-hover: #1a1a1a;
--bg-input: #0a0a0a;
--border-color: #27272a;
--border-focus: #3b82f6;
--text-main: #e4e4e7;
--text-muted: #a1a1aa;
--text-dim: #52525b;
--accent: #3b82f6;
--accent-glow: rgba(59, 130, 246, 0.15);
--color-weights: #3b82f6;
--color-cache: #8b5cf6;
--color-overhead: #71717a;
--color-overload: #ef4444;
--success: #10b981;
--warning: #f59e0b;
--danger: #ef4444;
}
/* Prevent body conflicts with WordPress theme */
body.page .vram-calculator-wrapper {
display: block !important;
}
/* Calculator wrapper - contains all styles */
.vram-calculator-wrapper {
background-color: var(--bg-body);
color: var(--text-main);
font-family: 'Inter', sans-serif;
font-size: 14px;
line-height: 1.5;
min-height: 100vh;
}
.vram-calculator-wrapper * {
margin: 0;
padding: 0;
box-sizing: border-box;
}
.vram-calculator-wrapper .app-container {
max-width: 1400px;
margin: 0 auto;
padding: 2rem;
width: 100%;
flex: 1;
display: flex;
flex-direction: column;
gap: 2rem;
}
.vram-calculator-wrapper header {
display: flex;
justify-content: space-between;
align-items: center;
padding-bottom: 1.5rem;
border-bottom: 1px solid var(--border-color);
}
.vram-calculator-wrapper h1 { font-size: 1.25rem; font-weight: 700; letter-spacing: -0.025em; }
.vram-calculator-wrapper .brand { color: var(--accent); }
.vram-calculator-wrapper .dashboard {
display: grid;
grid-template-columns: 1fr 400px;
gap: 2rem;
align-items: start;
}
@media (max-width: 1024px) {
.vram-calculator-wrapper .dashboard { grid-template-columns: 1fr; }
}
/* --- Cards --- */
.vram-calculator-wrapper .card {
background: var(--bg-card);
border: 1px solid var(--border-color);
border-radius: 12px;
transition: border-color 0.2s;
position: relative;
}
.vram-calculator-wrapper .card-header {
padding: 1rem 1.5rem;
border-bottom: 1px solid var(--border-color);
background: rgba(255,255,255,0.02);
font-weight: 600;
display: flex;
align-items: center;
gap: 0.5rem;
color: var(--text-main);
border-radius: 12px 12px 0 0;
}
.vram-calculator-wrapper .card-body { padding: 1.5rem; }
/* --- Inputs --- */
.vram-calculator-wrapper .input-group { margin-bottom: 1.25rem; position: relative; }
.vram-calculator-wrapper .input-group:last-child { margin-bottom: 0; }
.vram-calculator-wrapper .input-group label {
display: block;
color: var(--text-muted);
font-size: 0.85rem;
margin-bottom: 0.5rem;
font-weight: 500;
}
.vram-calculator-wrapper input[type="text"],
.vram-calculator-wrapper input[type="number"],
.vram-calculator-wrapper input[type="password"],
.vram-calculator-wrapper select {
width: 100%;
background: var(--bg-input) !important;
border: 1px solid var(--border-color) !important;
color: var(--text-main) !important;
padding: 0 1rem !important;
height: 42px !important;
border-radius: 6px !important;
font-family: inherit !important;
font-size: 0.9rem !important;
transition: all 0.2s ease !important;
-webkit-appearance: none !important;
-moz-appearance: none !important;
appearance: none !important;
background-image: none !important;
}
/* Custom dropdown arrow for selects */
.vram-calculator-wrapper select {
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 12 12'%3E%3Cpath fill='%23a1a1aa' d='M6 9L1 4h10z'/%3E%3C/svg%3E") !important;
background-repeat: no-repeat !important;
background-position: right 1rem center !important;
padding-right: 2.5rem !important;
}
/* Remove any WordPress/theme pseudo-elements */
.vram-calculator-wrapper select::after,
.vram-calculator-wrapper select::before {
display: none !important;
content: none !important;
}
.vram-calculator-wrapper input:focus,
.vram-calculator-wrapper select:focus {
outline: none !important;
border-color: var(--border-focus) !important;
box-shadow: 0 0 0 2px var(--accent-glow) !important;
}
.vram-calculator-wrapper .form-row {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 1.5rem;
margin-top: 1.25rem;
}
@media (max-width: 700px) {
.vram-calculator-wrapper .form-row { grid-template-columns: 1fr; gap: 1rem; }
}
.vram-calculator-wrapper .form-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 1rem;
}
.vram-calculator-wrapper .btn-primary {
background: var(--accent);
color: white;
border: none;
padding: 0 1.5rem;
height: 42px;
border-radius: 6px;
font-weight: 600;
cursor: pointer;
transition: opacity 0.2s;
white-space: nowrap;
display: inline-flex;
align-items: center;
justify-content: center;
}
.vram-calculator-wrapper .btn-primary:hover { opacity: 0.9; }
/* --- Visualization --- */
.vram-calculator-wrapper .viz-container { margin-top: 1.5rem; }
.vram-calculator-wrapper .viz-bar-wrapper { height: 32px; background: var(--bg-input); border-radius: 6px; display: flex; overflow: hidden; position: relative; }
.vram-calculator-wrapper .viz-segment { height: 100%; transition: width 0.4s ease; }
.vram-calculator-wrapper .seg-model { background: var(--color-weights); }
.vram-calculator-wrapper .seg-kv { background: var(--color-cache); }
.vram-calculator-wrapper .seg-sys { background: var(--color-overhead); }
.vram-calculator-wrapper .seg-over { background: repeating-linear-gradient(45deg, var(--color-overload), var(--color-overload) 10px, #b91c1c 10px, #b91c1c 20px); }
.vram-calculator-wrapper .legend { display: flex; gap: 1.25rem; margin-top: 1rem; flex-wrap: wrap; }
.vram-calculator-wrapper .legend-item { display: flex; align-items: center; gap: 0.4rem; font-size: 0.8rem; color: var(--text-muted); }
.vram-calculator-wrapper .dot { width: 10px; height: 10px; border-radius: 3px; }
.vram-calculator-wrapper .dot.seg-model { background: var(--color-weights); }
.vram-calculator-wrapper .dot.seg-kv { background: var(--color-cache); }
.vram-calculator-wrapper .dot.seg-sys { background: var(--color-overhead); }
.vram-calculator-wrapper .dot.seg-over { background: var(--color-overload); }
.vram-calculator-wrapper .limit-line { position: absolute; top: -4px; bottom: -4px; width: 3px; background: var(--text-main); border-radius: 2px; z-index: 10; display: none; box-shadow: 0 0 8px rgba(255,255,255,0.5); }
/* Recommendations */
.vram-calculator-wrapper .recs-box { margin-top: 1.5rem; padding: 1rem; border-radius: 8px; background: rgba(16, 185, 129, 0.1); border: 1px solid rgba(16, 185, 129, 0.2); }
.vram-calculator-wrapper .recs-box.warning { background: rgba(245, 158, 11, 0.1); border-color: rgba(245, 158, 11, 0.3); }
.vram-calculator-wrapper .recs-box.danger { background: rgba(239, 68, 68, 0.1); border-color: rgba(239, 68, 68, 0.3); }
.vram-calculator-wrapper .recs-title { font-weight: 600; margin-bottom: 0.75rem; }
.vram-calculator-wrapper .rec-step { display: flex; align-items: center; gap: 0.5rem; margin: 0.5rem 0; font-size: 0.9rem; }
.vram-calculator-wrapper .rec-tag { font-size: 0.7rem; padding: 2px 6px; border-radius: 4px; font-weight: 600; text-transform: uppercase; }
.vram-calculator-wrapper .tag-quant { background: rgba(59, 130, 246, 0.2); color: #60a5fa; }
.vram-calculator-wrapper .tag-ctx { background: rgba(139, 92, 246, 0.2); color: #a78bfa; }
.vram-calculator-wrapper .tag-cache { background: rgba(16, 185, 129, 0.2); color: #34d399; }
.vram-calculator-wrapper .rec-solution { font-weight: 500; margin-bottom: 0.5rem; color: var(--text-main); }
/* Specs Grid */
.vram-calculator-wrapper .specs-grid { display: grid; grid-template-columns: repeat(2, 1fr); gap: 1rem; }
.vram-calculator-wrapper .spec-item { display: flex; flex-direction: column; gap: 0.25rem; }
.vram-calculator-wrapper .spec-label { font-size: 0.75rem; color: var(--text-dim); text-transform: uppercase; }
.vram-calculator-wrapper .spec-val { font-family: 'JetBrains Mono', monospace; font-size: 1rem; color: var(--text-main); }
/* Searchable dropdown */
.vram-calculator-wrapper .dropdown-container { position: relative; }
.vram-calculator-wrapper .dropdown-results { position: absolute; top: 100%; left: 0; right: 0; background: var(--bg-card); border: 1px solid var(--border-color); border-radius: 6px; max-height: 200px; overflow-y: auto; z-index: 50; display: none; margin-top: 4px; }
.vram-calculator-wrapper .dropdown-results.active { display: block; }
.vram-calculator-wrapper .dropdown-item { padding: 0.75rem 1rem; cursor: pointer; display: flex; justify-content: space-between; align-items: center; }
.vram-calculator-wrapper .dropdown-item:hover { background: var(--bg-card-hover); }
.vram-calculator-wrapper .dropdown-meta { font-size: 0.8rem; color: var(--text-dim); font-family: 'JetBrains Mono', monospace; }
/* GPU Grid */
.vram-calculator-wrapper .gpu-section { margin-top: 2rem; }
.vram-calculator-wrapper .gpu-section-title {
font-size: 1rem;
font-weight: 600;
margin-bottom: 1rem;
color: var(--text-main);
display: flex;
justify-content: space-between;
align-items: center;
gap: 1rem;
flex-wrap: wrap;
}
.vram-calculator-wrapper .gpu-section-title select {
max-width: 200px;
min-width: 150px;
flex-shrink: 0;
}
@media (max-width: 600px) {
.vram-calculator-wrapper .gpu-section-title {
flex-direction: column;
align-items: flex-start;
}
.vram-calculator-wrapper .gpu-section-title select {
width: 100%;
max-width: none;
}
}
.vram-calculator-wrapper .gpu-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 1rem; }
.vram-calculator-wrapper .gpu-card { background: var(--bg-card); border: 1px solid var(--border-color); padding: 1.25rem; border-radius: 10px; transition: all 0.2s; }
.vram-calculator-wrapper .gpu-card:hover { transform: translateY(-2px); border-color: var(--border-focus); }
.vram-calculator-wrapper .gpu-top { display: flex; justify-content: space-between; margin-bottom: 1rem; }
.vram-calculator-wrapper .gpu-name { font-weight: 600; font-size: 0.95rem; color: var(--text-main); }
.vram-calculator-wrapper .gpu-vram { font-family: 'JetBrains Mono', monospace; font-size: 0.85rem; color: var(--text-muted); background: rgba(255,255,255,0.05); padding: 2px 6px; border-radius: 4px; }
.vram-calculator-wrapper .gpu-bar-bg { height: 6px; background: var(--bg-input); border-radius: 3px; overflow: hidden; margin-bottom: 0.75rem; }
.vram-calculator-wrapper .gpu-bar-fill { height: 100%; border-radius: 3px; transition: width 0.3s ease; }
.vram-calculator-wrapper .hidden { display: none !important; }
.vram-calculator-wrapper .sticky-panel { position: sticky; top: 2rem; }
.vram-calculator-wrapper .checkbox-group { display: flex; align-items: center; gap: 0.5rem; cursor: pointer; user-select: none; padding: 0.75rem; border: 1px solid var(--border-color); border-radius: 6px; background: var(--bg-input); }
.vram-calculator-wrapper .spinner { width: 16px; height: 16px; border: 2px solid rgba(255,255,255,0.3); border-radius: 50%; border-top-color: #fff; animation: spin 0.8s linear infinite; }
@keyframes spin { to { transform: rotate(360deg); } }
::-webkit-scrollbar { width: 8px; }
::-webkit-scrollbar-track { background: var(--bg-body); }
::-webkit-scrollbar-thumb { background: var(--border-color); border-radius: 4px; }
::-webkit-scrollbar-thumb:hover { background: var(--text-dim); }
.vram-calculator-wrapper .mobile-footer { display: none; position: fixed; bottom: 0; left: 0; right: 0; background: var(--bg-card); border-top: 1px solid var(--border-color); padding: 1rem; z-index: 100; box-shadow: 0 -4px 20px rgba(0,0,0,0.5); }
@media (max-width: 768px) {
.vram-calculator-wrapper .mobile-footer { display: flex; justify-content: space-between; align-items: center; }
.vram-calculator-wrapper .app-container { padding: 1rem; padding-bottom: 80px; }
.vram-calculator-wrapper .gpu-grid { grid-template-columns: 1fr; }
}
/* Lock Toggle Styles */
.vram-calculator-wrapper .lockable-input {
display: flex;
gap: 0.5rem;
align-items: stretch;
}
.vram-calculator-wrapper .lockable-input select,
.vram-calculator-wrapper .lockable-input input {
flex: 1;
min-width: 0;
}
.vram-calculator-wrapper .lock-btn {
width: 42px;
height: 42px;
display: flex;
align-items: center;
justify-content: center;
background: var(--bg-input);
border: 1px solid var(--border-color);
border-radius: 6px;
cursor: pointer;
transition: all 0.2s;
color: var(--text-dim);
flex-shrink: 0;
}
.vram-calculator-wrapper .lock-btn:hover {
border-color: var(--text-muted);
color: var(--text-muted);
}
.vram-calculator-wrapper .lock-btn.locked {
background: rgba(59, 130, 246, 0.15);
border-color: var(--accent);
color: var(--accent);
}
.vram-calculator-wrapper .lock-btn svg {
width: 16px;
height: 16px;
}
/* Optimal badge */
.vram-calculator-wrapper .optimal-badge {
display: inline-flex;
align-items: center;
gap: 0.25rem;
font-size: 0.7rem;
padding: 2px 6px;
border-radius: 4px;
background: rgba(16, 185, 129, 0.15);
color: var(--success);
font-weight: 500;
margin-left: 0.5rem;
}
/* Label with badge container */
.vram-calculator-wrapper .label-row {
display: flex;
align-items: center;
justify-content: space-between;
margin-bottom: 0.5rem;
}
.vram-calculator-wrapper .label-row label {
margin-bottom: 0;
}
</style>
</head>
<body>
<div class="vram-calculator-wrapper">
<div class="app-container">
<header>
<h1>LLM VRAM <span class="brand">Calculator</span></h1>
<span id="header-custom-text" style="color: var(--text-muted); font-size: 0.85rem;">HF version of the calculator found at https://novaml.ai/vram/</span>
<a href="#" onclick="window.location.reload()" style="color: var(--text-muted); text-decoration: none; font-size: 0.9rem;">Reset</a>
</header>
<!-- Main Loader Card -->
<div class="card">
<div class="card-body">
<div class="input-group">
<label>HuggingFace Model Path</label>
<div style="display: flex; gap: 1rem;">
<input type="text" id="model-path" placeholder="e.g. meta-llama/Llama-3.3-70B-Instruct">
<button id="load-btn" class="btn-primary">Load Model</button>
</div>
</div>
<div class="form-row">
<div class="input-group">
<label>HF Token (Optional)</label>
<input type="password" id="hf-token" placeholder="hf_...">
</div>
<div class="input-group">
<label>Your Hardware (Optional)</label>
<div class="dropdown-container">
<input type="text" id="selected-gpu-input" placeholder="Search GPU (e.g. 4090)" autocomplete="off">
<div class="dropdown-results" id="gpu-dropdown"></div>
</div>
</div>
</div>
<div id="error-msg" style="color: var(--danger); font-size: 0.85rem; margin-top: 0.5rem;"></div>
</div>
</div>
<div class="dashboard">
<!-- Left: Settings -->
<div class="settings-column">
<div class="card hidden" id="model-specs" style="margin-bottom: 2rem; border-left: 3px solid var(--accent);">
<div class="card-header">
<span>Model Specifications</span>
<span id="spec-arch-badge" style="font-size:0.75rem; background:rgba(255,255,255,0.1); padding:2px 8px; border-radius:4px;">-</span>
</div>
<div class="card-body">
<div class="specs-grid">
<div class="spec-item"><span class="spec-label">Parameters</span><span class="spec-val" id="spec-params">-</span></div>
<div class="spec-item"><span class="spec-label">Hidden Size</span><span class="spec-val" id="spec-hidden">-</span></div>
<div class="spec-item"><span class="spec-label">Layers</span><span class="spec-val" id="spec-layers">-</span></div>
<div class="spec-item"><span class="spec-label">Attn Heads</span><span class="spec-val" id="spec-heads">-</span></div>
</div>
</div>
</div>
<div class="card" id="config-panel" style="opacity: 0.5; pointer-events: none;">
<div class="card-header">
<span>Inference Configuration</span>
<span id="lock-hint" class="hidden" style="font-size: 0.75rem; font-weight: 400; color: var(--text-dim); margin-left: auto;">🔒 = recalculate with this parameter</span>
</div>
<div class="card-body">
<div class="form-grid">
<div class="input-group">
<div class="label-row">
<label>Quantization Method</label>
<span id="quant-optimal" class="optimal-badge hidden">✓ Optimal</span>
</div>
<div class="lockable-input">
<select id="quant">
<optgroup label="High Quality">
<option value="FP16">FP16 (16.0 bpw)</option>
<option value="Q8_0">Q8_0 (8.5 bpw)</option>
<option value="Q6_K">Q6_K (6.59 bpw)</option>
</optgroup>
<optgroup label="Balanced">
<option value="Q5_K_M">Q5_K_M (5.69 bpw)</option>
<option value="Q5_K_S">Q5_K_S (5.54 bpw)</option>
<option value="Q4_K_M" selected>Q4_K_M (4.85 bpw)</option>
<option value="Q4_K_S">Q4_K_S (4.58 bpw)</option>
<option value="Q4_0">Q4_0 (4.55 bpw)</option>
</optgroup>
<optgroup label="Aggressive">
<option value="Q3_K_M">Q3_K_M (3.91 bpw)</option>
<option value="Q3_K_S">Q3_K_S (3.5 bpw)</option>
<option value="Q2_K">Q2_K (3.35 bpw)</option>
<option value="IQ3_XXS">IQ3_XXS (3.06 bpw)</option>
<option value="IQ2_XXS">IQ2_XXS (2.06 bpw)</option>
</optgroup>
</select>
<button type="button" class="lock-btn" id="lock-quant" title="Lock: optimizer won't change this setting">
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<rect x="3" y="11" width="18" height="11" rx="2" ry="2"></rect>
<path d="M7 11V7a5 5 0 0 1 5-5 5 5 0 0 1 5 5v1"></path>
</svg>
</button>
</div>
</div>
<div class="input-group">
<div class="label-row">
<label>KV Cache Precision</label>
<span id="cache-optimal" class="optimal-badge hidden">✓ Optimal</span>
</div>
<div class="lockable-input">
<select id="cache-type">
<option value="fp16" selected>FP16 (Standard)</option>
<option value="q8_0">Q8_0 (Compressed)</option>
<option value="q4_0">Q4_0 (Highly Compressed)</option>
</select>
<button type="button" class="lock-btn" id="lock-cache" title="Lock: optimizer won't change this setting">
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<rect x="3" y="11" width="18" height="11" rx="2" ry="2"></rect>
<path d="M7 11V7a5 5 0 0 1 5-5 5 5 0 0 1 5 5v1"></path>
</svg>
</button>
</div>
</div>
</div>
<div class="form-grid">
<div class="input-group">
<div class="label-row">
<label>Context Length</label>
<span id="context-optimal" class="optimal-badge hidden">✓ Optimal</span>
</div>
<div class="lockable-input">
<input type="number" id="context" value="8192" step="1024" min="512">
<button type="button" class="lock-btn" id="lock-context" title="Lock: optimizer won't change this setting">
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<rect x="3" y="11" width="18" height="11" rx="2" ry="2"></rect>
<path d="M7 11V7a5 5 0 0 1 5-5 5 5 0 0 1 5 5v1"></path>
</svg>
</button>
</div>
</div>
<div class="input-group">
<label>Batch Size</label>
<input type="number" id="batch" value="512" step="1" min="1">
</div>
</div>
<div class="input-group">
<label>Framework</label>
<select id="framework">
<option value="llama.cpp" selected>llama.cpp (Efficient)</option>
<option value="ExLlamaV2">ExLlamaV2 (Very Efficient)</option>
<option value="vLLM">vLLM (Production)</option>
<option value="transformers">HuggingFace Transformers (Heavy)</option>
</select>
</div>
<div class="form-grid">
<label class="checkbox-group">
<input type="checkbox" id="flash-attn" checked>
<span>Flash Attention</span>
</label>
<label class="checkbox-group hidden" id="mmproj-wrapper">
<input type="checkbox" id="mmproj" checked>
<span>Vision Adapter</span>
</label>
</div>
</div>
</div>
</div>
<!-- Right: Results -->
<div class="results-column">
<div class="card sticky-panel">
<div class="card-header">
<span>Estimation Results</span>
</div>
<div class="card-body">
<div style="text-align: center; margin-bottom: 1.5rem;">
<div style="font-size: 0.8rem; color: var(--text-muted); text-transform: uppercase;">Estimated Usage</div>
<div style="font-size: 2.5rem; font-weight: 700; color: var(--text-main); font-family: 'JetBrains Mono';">
<span id="total-vram">0.0</span> <span style="font-size: 1rem; color: var(--text-dim);">GB</span>
</div>
</div>
<div class="viz-container">
<div class="viz-bar-wrapper">
<div id="limit-line" class="limit-line"></div>
<div class="viz-segment seg-model" id="seg-model" style="width:0%"></div>
<div class="viz-segment seg-kv" id="seg-kv" style="width:0%"></div>
<div class="viz-segment seg-sys" id="seg-sys" style="width:0%"></div>
<div class="viz-segment seg-over" id="seg-over" style="width:0%; display:none;"></div>
</div>
<div class="legend">
<div class="legend-item"><div class="dot seg-model"></div> Model</div>
<div class="legend-item"><div class="dot seg-kv"></div> Context</div>
<div class="legend-item"><div class="dot seg-sys"></div> Overhead</div>
<div class="legend-item hidden" id="legend-over"><div class="dot seg-over"></div> Overload</div>
</div>
</div>
<div id="rec-container" class="hidden">
<div id="rec-box" class="recs-box">
<div class="recs-title" id="rec-title"></div>
<div id="rec-text"></div>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="gpu-section">
<div class="gpu-section-title">
<span>Compatibility Matrix</span>
<select id="gpu-filter">
<option value="">All GPUs</option>
<option value="consumer">Consumer</option>
<option value="datacenter">Datacenter</option>
</select>
</div>
<div id="gpu-grid" class="gpu-grid"></div>
</div>
</div>
<div class="mobile-footer">
<div>
<div style="font-size: 0.75rem; color: var(--text-muted);">Total VRAM</div>
<div style="font-weight: 700; color: var(--text-main); font-size: 1.2rem;">
<span id="mobile-vram-val">0.0</span> GB
</div>
</div>
<button onclick="window.scrollTo({top: 0, behavior: 'smooth'})" class="btn-primary" style="padding: 0.5rem 1rem;">Edit</button>
</div>
<script>
// ============================================
// CONSTANTS
// ============================================
const BPW = {
'FP16': 16, 'Q8_0': 8.5, 'Q6_K': 6.59, 'Q5_K_M': 5.69, 'Q5_K_S': 5.54,
'Q4_K_M': 4.85, 'Q4_K_S': 4.58, 'Q4_0': 4.55, 'Q3_K_M': 3.91, 'Q3_K_S': 3.5,
'Q2_K': 3.35, 'IQ3_XXS': 3.06, 'IQ2_XXS': 2.06
};
// Priority ordered (highest quality first)
const QUANTS_ORDERED = ['FP16', 'Q8_0', 'Q6_K', 'Q5_K_M', 'Q5_K_S', 'Q4_K_M', 'Q4_K_S', 'Q4_0', 'Q3_K_M', 'Q3_K_S', 'Q2_K', 'IQ3_XXS', 'IQ2_XXS'];
const CACHE_ORDERED = ['fp16', 'q8_0', 'q4_0'];
const CONTEXT_TIERS = [131072, 65536, 32768, 16384, 8192, 4096, 2048];
const GPUS = [
{ name: 'NVIDIA RTX 4090', vram: 24, type: 'consumer' },
{ name: 'NVIDIA RTX 4080 Super', vram: 16, type: 'consumer' },
{ name: 'NVIDIA RTX 4080', vram: 16, type: 'consumer' },
{ name: 'NVIDIA RTX 4070 Ti Super', vram: 16, type: 'consumer' },
{ name: 'NVIDIA RTX 4070 Ti', vram: 12, type: 'consumer' },
{ name: 'NVIDIA RTX 4060 Ti 16GB', vram: 16, type: 'consumer' },
{ name: 'NVIDIA RTX 3090', vram: 24, type: 'consumer' },
{ name: 'NVIDIA A100 80GB', vram: 80, type: 'datacenter' },
{ name: 'NVIDIA A6000 Ada', vram: 48, type: 'datacenter' },
{ name: 'NVIDIA H100 80GB', vram: 80, type: 'datacenter' },
{ name: 'NVIDIA L40S', vram: 48, type: 'datacenter' },
{ name: 'Mac M3 Max (128GB)', vram: 128, type: 'consumer' },
{ name: 'Mac M3 Max (64GB)', vram: 64, type: 'consumer' },
{ name: 'Mac M3 Pro (36GB)', vram: 36, type: 'consumer' }
];
let modelConfig = null;
let selectedGPUVRAM = null;
let optimalConfig = null;
// Lock states
const locks = {
quant: false,
context: false,
cache: false
};
// ============================================
// DOM ELEMENTS
// ============================================
const els = {
loadBtn: document.getElementById('load-btn'),
modelPath: document.getElementById('model-path'),
errorMsg: document.getElementById('error-msg'),
configPanel: document.getElementById('config-panel'),
modelSpecs: document.getElementById('model-specs'),
totalVram: document.getElementById('total-vram'),
mobileVram: document.getElementById('mobile-vram-val'),
segModel: document.getElementById('seg-model'),
segKv: document.getElementById('seg-kv'),
segSys: document.getElementById('seg-sys'),
segOver: document.getElementById('seg-over'),
limitLine: document.getElementById('limit-line'),
legendOver: document.getElementById('legend-over'),
recContainer: document.getElementById('rec-container'),
recBox: document.getElementById('rec-box'),
recTitle: document.getElementById('rec-title'),
recText: document.getElementById('rec-text'),
gpuGrid: document.getElementById('gpu-grid'),
lockHint: document.getElementById('lock-hint'),
specs: {
params: document.getElementById('spec-params'),
hidden: document.getElementById('spec-hidden'),
layers: document.getElementById('spec-layers'),
heads: document.getElementById('spec-heads'),
badge: document.getElementById('spec-arch-badge')
},
locks: {
quant: document.getElementById('lock-quant'),
context: document.getElementById('lock-context'),
cache: document.getElementById('lock-cache')
},
optimal: {
quant: document.getElementById('quant-optimal'),
context: document.getElementById('context-optimal'),
cache: document.getElementById('cache-optimal')
}
};
// ============================================
// GPU DROPDOWN
// ============================================
const gpuInput = document.getElementById('selected-gpu-input');
const gpuDropdown = document.getElementById('gpu-dropdown');
function renderDropdown(filterText = '') {
gpuDropdown.innerHTML = '';
const lowerFilter = filterText.toLowerCase();
const filtered = GPUS.filter(g => g.name.toLowerCase().includes(lowerFilter));
if (filtered.length === 0) {
const div = document.createElement('div');
div.className = 'dropdown-item';
div.textContent = 'No GPUs found';
div.style.color = 'var(--text-dim)';
gpuDropdown.appendChild(div);
} else {
filtered.forEach(gpu => {
const div = document.createElement('div');
div.className = 'dropdown-item';
div.innerHTML = `<span>${gpu.name}</span><span class="dropdown-meta">${gpu.vram} GB</span>`;
div.onclick = () => {
gpuInput.value = gpu.name;
selectedGPUVRAM = gpu.vram;
gpuDropdown.classList.remove('active');
onHardwareChange();
};
gpuDropdown.appendChild(div);
});
}
}
gpuInput.addEventListener('focus', () => { renderDropdown(gpuInput.value); gpuDropdown.classList.add('active'); });
gpuInput.addEventListener('input', (e) => {
renderDropdown(e.target.value);
if(e.target.value === '') {
selectedGPUVRAM = null;
} else {
const match = GPUS.find(g => g.name.toLowerCase() === e.target.value.toLowerCase());
if(match) selectedGPUVRAM = match.vram;
}
onHardwareChange();
});
document.addEventListener('click', (e) => {
if (!gpuInput.contains(e.target) && !gpuDropdown.contains(e.target)) gpuDropdown.classList.remove('active');
});
// ============================================
// LOCK TOGGLES
// ============================================
Object.keys(els.locks).forEach(key => {
els.locks[key].addEventListener('click', () => {
locks[key] = !locks[key];
els.locks[key].classList.toggle('locked', locks[key]);
// Update icon to locked/unlocked
if (locks[key]) {
els.locks[key].innerHTML = `
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<rect x="3" y="11" width="18" height="11" rx="2" ry="2"></rect>
<path d="M7 11V7a5 5 0 0 1 10 0v4"></path>
</svg>`;
els.locks[key].title = 'Locked: optimizer will keep this setting';
} else {
els.locks[key].innerHTML = `
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<rect x="3" y="11" width="18" height="11" rx="2" ry="2"></rect>
<path d="M7 11V7a5 5 0 0 1 5-5 5 5 0 0 1 5 5v1"></path>
</svg>`;
els.locks[key].title = 'Lock: optimizer won\'t change this setting';
}
onHardwareChange();
});
});
// ============================================
// EVENT LISTENERS
// ============================================
els.loadBtn.addEventListener('click', loadModel);
els.modelPath.addEventListener('keydown', e => e.key === 'Enter' && loadModel());
const inputs = ['quant', 'cache-type', 'context', 'batch', 'framework', 'flash-attn', 'mmproj', 'gpu-filter'];
inputs.forEach(id => {
const el = document.getElementById(id);
if(el) {
el.addEventListener('change', calculate);
if(el.tagName === 'INPUT') el.addEventListener('input', calculate);
}
});
// ============================================
// MODEL LOADING
// ============================================
async function loadModel() {
const path = els.modelPath.value.trim();
if (!path) { showError('Enter model path'); return; }
showError('');
els.loadBtn.disabled = true;
els.loadBtn.innerHTML = '<div class="spinner"></div>';
try {
const token = document.getElementById('hf-token').value.trim();
const headers = token ? { 'Authorization': `Bearer ${token}` } : {};
const res = await fetch(`https://huggingface.co/${path}/raw/main/config.json`, { headers });
if (!res.ok) throw new Error(res.status === 401 ? 'Model is gated. Provide HF Token.' : 'Model not found');
let config = await res.json();
if (config.text_config) config = config.text_config;
let params = 0;
let hidden = config.hidden_size || config.d_model || 4096;
let layers = config.num_hidden_layers || config.n_layer || 32;
let heads = config.num_attention_heads || config.n_head || 32;
let kvHeads = config.num_key_value_heads || heads;
try {
const idx = await fetch(`https://huggingface.co/${path}/resolve/main/model.safetensors.index.json`, { headers });
if (idx.ok) {
const data = await idx.json();
params = data.metadata?.total_size ? (data.metadata.total_size / 2) / 1e9 : 0;
}
} catch(e){}
if (!params) params = ((hidden * hidden * layers * 12) + (hidden * 32000)) / 1e9;
modelConfig = {
name: path,
params: params,
layers: layers,
hidden: hidden,
heads: heads,
kvHeads: kvHeads,
isVision: config.architectures?.[0]?.toLowerCase().includes('llava') || false,
arch: config.architectures?.[0] || 'Transformer'
};
els.configPanel.style.opacity = '1';
els.configPanel.style.pointerEvents = 'auto';
els.modelSpecs.classList.remove('hidden');
els.specs.params.textContent = `${modelConfig.params.toFixed(1)}B`;
els.specs.hidden.textContent = hidden;
els.specs.layers.textContent = layers;
els.specs.heads.textContent = `${heads} / ${kvHeads} KV`;
els.specs.badge.textContent = modelConfig.arch;
document.getElementById('mmproj-wrapper').classList.toggle('hidden', !modelConfig.isVision);
onHardwareChange();
} catch (err) {
showError(err.message);
} finally {
els.loadBtn.disabled = false;
els.loadBtn.textContent = 'Load Model';
}
}
// ============================================
// VRAM CALCULATION
// ============================================
function calculateVRAM(overrides = {}) {
const quant = overrides.quant || document.getElementById('quant').value;
const ctx = overrides.ctx !== undefined ? overrides.ctx : parseInt(document.getElementById('context').value);
const cache = overrides.cache || document.getElementById('cache-type').value;
const batch = overrides.batch || parseInt(document.getElementById('batch').value) || 1;
const flash = overrides.flash !== undefined ? overrides.flash : document.getElementById('flash-attn').checked;
const fw = overrides.fw || document.getElementById('framework').value;
const vision = document.getElementById('mmproj').checked && modelConfig.isVision;
const weights = (modelConfig.params * BPW[quant]) / 8;
const kvRatio = modelConfig.heads / modelConfig.kvHeads;
const elPerToken = 2 * modelConfig.layers * (modelConfig.hidden / kvRatio);
let cacheBytes = cache === 'fp16' ? 2 : (cache === 'q8_0' ? 1 : 0.5);
let kv = (elPerToken * ctx * cacheBytes) / (1024**3);
if (flash) kv *= 0.65;
const frameworkOverheadPct = { 'llama.cpp': 0.15, 'ExLlamaV2': 0.14, 'vLLM': 0.12, 'transformers': 0.22 };
const visionSize = vision ? 0.8 : 0;
const overhead = (weights * frameworkOverheadPct[fw]) + visionSize;
const total = weights + kv + overhead;
return { total, weights, kv, overhead };
}
// ============================================
// OPTIMAL CONFIG FINDER
// ============================================
function findOptimalConfig(vramLimit, constraints = {}) {
if (!modelConfig) return null;
const flash = document.getElementById('flash-attn').checked;
// If all three are locked, just check if it fits
if (constraints.quant && constraints.ctx && constraints.cache) {
const sim = calculateVRAM({
quant: constraints.quant,
ctx: constraints.ctx,
cache: constraints.cache,
flash
});
return sim.total <= vramLimit ? {
quant: constraints.quant,
ctx: constraints.ctx,
cache: constraints.cache,
vram: sim.total,
fits: true
} : null;
}
// Get iteration ranges based on locks
const quantRange = constraints.quant ? [constraints.quant] : QUANTS_ORDERED;
const cacheRange = constraints.cache ? [constraints.cache] : CACHE_ORDERED;
// For context, start from locked value or try tiers
let contextRange;
if (constraints.ctx) {
contextRange = [constraints.ctx];
} else {
// Use standard tiers, filtered to reasonable values
contextRange = CONTEXT_TIERS.filter(c => c <= 131072);
}
// Greedy search: prioritize quant > cache > context
// (Preserve FP16 cache over high context - cache quantization has more quality impact)
for (const quant of quantRange) {
for (const cache of cacheRange) {
for (const ctx of contextRange) {
const sim = calculateVRAM({ quant, ctx, cache, flash });
if (sim.total <= vramLimit) {
return {
quant,
ctx,
cache,
vram: sim.total,
fits: true
};
}
}
}
}
return null; // Nothing fits
}
// ============================================
// MAIN UPDATE FLOW
// ============================================
function onHardwareChange() {
if (!modelConfig) return;
// Show/hide lock hint based on GPU selection
els.lockHint.classList.toggle('hidden', !selectedGPUVRAM);
// Build constraints from locks
const constraints = {};
if (locks.quant) constraints.quant = document.getElementById('quant').value;
if (locks.context) constraints.ctx = parseInt(document.getElementById('context').value);
if (locks.cache) constraints.cache = document.getElementById('cache-type').value;
// Find optimal config
if (selectedGPUVRAM) {
optimalConfig = findOptimalConfig(selectedGPUVRAM, constraints);
// If no locks, apply optimal config automatically
if (!locks.quant && !locks.context && !locks.cache && optimalConfig) {
document.getElementById('quant').value = optimalConfig.quant;
document.getElementById('context').value = optimalConfig.ctx;
document.getElementById('cache-type').value = optimalConfig.cache;
}
} else {
optimalConfig = null;
}
calculate();
}
function calculate() {
if (!modelConfig) return;
const res = calculateVRAM();
updateUI(res.total, res.weights, res.kv, res.overhead);
renderGrid(res.total);
}
function updateUI(total, w, k, o) {
els.totalVram.textContent = total.toFixed(1);
els.mobileVram.textContent = total.toFixed(1);
const limit = selectedGPUVRAM || total;
const isOver = total > limit && selectedGPUVRAM !== null;
const totalWidth = isOver ? total : limit;
els.segModel.style.width = `${(w / totalWidth) * 100}%`;
els.segKv.style.width = `${(k / totalWidth) * 100}%`;
els.segSys.style.width = `${(o / totalWidth) * 100}%`;
// Update optimal badges
updateOptimalBadges();
if (isOver) {
const limitPos = (limit / total) * 100;
els.segOver.style.display = 'block';
els.segOver.style.position = 'absolute';
els.segOver.style.left = `${limitPos}%`;
els.segOver.style.right = '0';
els.segOver.style.width = 'auto';
els.limitLine.style.display = 'block';
els.limitLine.style.left = `${limitPos}%`;
els.legendOver.classList.remove('hidden');
showOverflowRecommendation(total, limit);
} else {
els.segOver.style.display = 'none';
els.limitLine.style.display = 'none';
els.legendOver.classList.add('hidden');
if (selectedGPUVRAM) {
showFitRecommendation(total, limit);
} else {
els.recContainer.classList.add('hidden');
}
}
}
function updateOptimalBadges() {
if (!optimalConfig || !selectedGPUVRAM) {
els.optimal.quant.classList.add('hidden');
els.optimal.context.classList.add('hidden');
els.optimal.cache.classList.add('hidden');
return;
}
const currentQuant = document.getElementById('quant').value;
const currentCtx = parseInt(document.getElementById('context').value);
const currentCache = document.getElementById('cache-type').value;
els.optimal.quant.classList.toggle('hidden', currentQuant !== optimalConfig.quant);
els.optimal.context.classList.toggle('hidden', currentCtx !== optimalConfig.ctx);
els.optimal.cache.classList.toggle('hidden', currentCache !== optimalConfig.cache);
}
// ============================================
// RECOMMENDATIONS
// ============================================
function showOverflowRecommendation(currentTotal, limit) {
els.recContainer.classList.remove('hidden');
els.recBox.className = 'recs-box danger';
const diff = currentTotal - limit;
els.recTitle.innerHTML = `<span style="color:var(--danger)">⚠️ Over VRAM Limit by ${diff.toFixed(1)} GB</span>`;
// Check if optimal exists
if (optimalConfig) {
const currentQuant = document.getElementById('quant').value;
const currentCtx = parseInt(document.getElementById('context').value);
const currentCache = document.getElementById('cache-type').value;
let html = '<div class="rec-solution">Suggested changes to fit:</div>';
let hasChanges = false;
if (optimalConfig.quant !== currentQuant) {
html += `<div class="rec-step"><span class="rec-tag tag-quant">QUANT</span> Switch to <b>${optimalConfig.quant}</b></div>`;
hasChanges = true;
}
if (optimalConfig.ctx !== currentCtx) {
html += `<div class="rec-step"><span class="rec-tag tag-ctx">CONTEXT</span> ${optimalConfig.ctx < currentCtx ? 'Reduce' : 'Increase'} to <b>${optimalConfig.ctx.toLocaleString()}</b></div>`;
hasChanges = true;
}
if (optimalConfig.cache !== currentCache) {
html += `<div class="rec-step"><span class="rec-tag tag-cache">CACHE</span> Switch to <b>${optimalConfig.cache.toUpperCase()}</b></div>`;
hasChanges = true;
}
if (hasChanges) {
html += `<div style="margin-top:0.75rem; color:var(--text-muted); font-size:0.85rem;">This would use ${optimalConfig.vram.toFixed(1)} GB (${(limit - optimalConfig.vram).toFixed(1)} GB free)</div>`;
}
els.recText.innerHTML = html;
} else {
// Nothing fits even with optimization
let html = '<div style="color:var(--text-muted);">This model cannot fit on your GPU';
// Check which locks are causing the issue
const lockedSettings = [];
if (locks.quant) lockedSettings.push('quantization');
if (locks.context) lockedSettings.push('context length');
if (locks.cache) lockedSettings.push('cache precision');
if (lockedSettings.length > 0) {
html += ` with locked ${lockedSettings.join(', ')}. Try unlocking some settings.`;
} else {
html += ' even with maximum optimization.';
}
html += '</div>';
els.recText.innerHTML = html;
}
}
function showFitRecommendation(currentTotal, limit) {
els.recContainer.classList.remove('hidden');
const headroom = limit - currentTotal;
const currentQuant = document.getElementById('quant').value;
const currentCtx = parseInt(document.getElementById('context').value);
const currentCache = document.getElementById('cache-type').value;
// Check if at optimal
const isOptimal = optimalConfig &&
currentQuant === optimalConfig.quant &&
currentCtx === optimalConfig.ctx &&
currentCache === optimalConfig.cache;
if (isOptimal) {
els.recBox.className = 'recs-box';
els.recTitle.innerHTML = `<span style="color:var(--success)">✅ Optimal Configuration</span>`;
if (headroom < 1.0) {
els.recText.innerHTML = '<div style="color:var(--text-muted);">Fit is tight. Consider closing other GPU applications.</div>';
} else {
els.recText.innerHTML = `<div style="color:var(--text-muted);">Best settings for your hardware. ${headroom.toFixed(1)} GB headroom.</div>`;
}
} else if (optimalConfig) {
// Show delta from optimal
els.recBox.className = 'recs-box warning';
els.recTitle.innerHTML = `<span style="color:var(--warning)">💡 Better Configuration Available</span>`;
let html = '<div class="rec-solution">For optimal performance:</div>';
if (optimalConfig.quant !== currentQuant) {
const qIdxCurrent = QUANTS_ORDERED.indexOf(currentQuant);
const qIdxOptimal = QUANTS_ORDERED.indexOf(optimalConfig.quant);
const direction = qIdxOptimal < qIdxCurrent ? 'Upgrade' : 'Downgrade';
html += `<div class="rec-step"><span class="rec-tag tag-quant">QUANT</span> ${direction} to <b>${optimalConfig.quant}</b></div>`;
}
if (optimalConfig.ctx !== currentCtx) {
const direction = optimalConfig.ctx > currentCtx ? 'Increase' : 'Reduce';
html += `<div class="rec-step"><span class="rec-tag tag-ctx">CONTEXT</span> ${direction} to <b>${optimalConfig.ctx.toLocaleString()}</b></div>`;
}
if (optimalConfig.cache !== currentCache) {
html += `<div class="rec-step"><span class="rec-tag tag-cache">CACHE</span> Switch to <b>${optimalConfig.cache.toUpperCase()}</b></div>`;
}
els.recText.innerHTML = html;
} else {
// No GPU selected case (shouldn't reach here but safety)
els.recBox.className = 'recs-box';
els.recTitle.innerHTML = `<span style="color:var(--success)">✅ Configuration Valid</span>`;
els.recText.innerHTML = `<div style="color:var(--text-muted);">${headroom.toFixed(1)} GB headroom remaining.</div>`;
}
}
// ============================================
// GPU GRID
// ============================================
function renderGrid(req) {
const filter = document.getElementById('gpu-filter').value;
els.gpuGrid.innerHTML = '';
let list = GPUS;
if (filter) list = list.filter(g => g.type === filter);
list.forEach(gpu => {
const percent = (req / gpu.vram) * 100;
const isSafe = req <= gpu.vram;
const isTight = isSafe && req > (gpu.vram * 0.9);
let color = isSafe ? (isTight ? 'var(--warning)' : 'var(--success)') : 'var(--danger)';
const html = `
<div class="gpu-card" style="${!isSafe ? 'opacity:0.8;' : ''}">
<div class="gpu-top">
<span class="gpu-name">${gpu.name}</span>
<span class="gpu-vram">${gpu.vram} GB</span>
</div>
<div class="gpu-bar-bg">
<div class="gpu-bar-fill" style="width:${Math.min(percent, 100)}%; background:${color}"></div>
</div>
<div style="display:flex; justify-content:space-between; font-size:0.8rem;">
<span style="color:${color}; font-weight:500">${isSafe ? (isTight ? 'Tight Fit' : 'Comfortable') : 'Insufficient'}</span>
<span style="color:var(--text-dim)">${percent.toFixed(0)}% usage</span>
</div>
</div>
`;
els.gpuGrid.innerHTML += html;
});
}
function showError(msg) { els.errorMsg.textContent = msg; }
</script>
</div>
</body>
</html>