Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>VRAM Estimator | NovaAI</title> | |
| <link rel="preconnect" href="https://fonts.googleapis.com"> | |
| <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> | |
| <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet"> | |
| <style> | |
| :root { | |
| --bg-body: #0f0f0f; | |
| --bg-card: #141414; | |
| --bg-card-hover: #1a1a1a; | |
| --bg-input: #0a0a0a; | |
| --border-color: #27272a; | |
| --border-focus: #3b82f6; | |
| --text-main: #e4e4e7; | |
| --text-muted: #a1a1aa; | |
| --text-dim: #52525b; | |
| --accent: #3b82f6; | |
| --accent-glow: rgba(59, 130, 246, 0.15); | |
| --color-weights: #3b82f6; | |
| --color-cache: #8b5cf6; | |
| --color-overhead: #71717a; | |
| --color-overload: #ef4444; | |
| --success: #10b981; | |
| --warning: #f59e0b; | |
| --danger: #ef4444; | |
| } | |
| /* Prevent body conflicts with WordPress theme */ | |
| body.page .vram-calculator-wrapper { | |
| display: block ; | |
| } | |
| /* Calculator wrapper - contains all styles */ | |
| .vram-calculator-wrapper { | |
| background-color: var(--bg-body); | |
| color: var(--text-main); | |
| font-family: 'Inter', sans-serif; | |
| font-size: 14px; | |
| line-height: 1.5; | |
| min-height: 100vh; | |
| } | |
| .vram-calculator-wrapper * { | |
| margin: 0; | |
| padding: 0; | |
| box-sizing: border-box; | |
| } | |
| .vram-calculator-wrapper .app-container { | |
| max-width: 1400px; | |
| margin: 0 auto; | |
| padding: 2rem; | |
| width: 100%; | |
| flex: 1; | |
| display: flex; | |
| flex-direction: column; | |
| gap: 2rem; | |
| } | |
| .vram-calculator-wrapper header { | |
| display: flex; | |
| justify-content: space-between; | |
| align-items: center; | |
| padding-bottom: 1.5rem; | |
| border-bottom: 1px solid var(--border-color); | |
| } | |
| .vram-calculator-wrapper h1 { font-size: 1.25rem; font-weight: 700; letter-spacing: -0.025em; } | |
| .vram-calculator-wrapper .brand { color: var(--accent); } | |
| .vram-calculator-wrapper .dashboard { | |
| display: grid; | |
| grid-template-columns: 1fr 400px; | |
| gap: 2rem; | |
| align-items: start; | |
| } | |
| @media (max-width: 1024px) { | |
| .vram-calculator-wrapper .dashboard { grid-template-columns: 1fr; } | |
| } | |
| /* --- Cards --- */ | |
| .vram-calculator-wrapper .card { | |
| background: var(--bg-card); | |
| border: 1px solid var(--border-color); | |
| border-radius: 12px; | |
| transition: border-color 0.2s; | |
| position: relative; | |
| } | |
| .vram-calculator-wrapper .card-header { | |
| padding: 1rem 1.5rem; | |
| border-bottom: 1px solid var(--border-color); | |
| background: rgba(255,255,255,0.02); | |
| font-weight: 600; | |
| display: flex; | |
| align-items: center; | |
| gap: 0.5rem; | |
| color: var(--text-main); | |
| border-radius: 12px 12px 0 0; | |
| } | |
| .vram-calculator-wrapper .card-body { padding: 1.5rem; } | |
| /* --- Inputs --- */ | |
| .vram-calculator-wrapper .input-group { margin-bottom: 1.25rem; position: relative; } | |
| .vram-calculator-wrapper .input-group:last-child { margin-bottom: 0; } | |
| .vram-calculator-wrapper .input-group label { | |
| display: block; | |
| color: var(--text-muted); | |
| font-size: 0.85rem; | |
| margin-bottom: 0.5rem; | |
| font-weight: 500; | |
| } | |
| .vram-calculator-wrapper input[type="text"], | |
| .vram-calculator-wrapper input[type="number"], | |
| .vram-calculator-wrapper input[type="password"], | |
| .vram-calculator-wrapper select { | |
| width: 100%; | |
| background: var(--bg-input) ; | |
| border: 1px solid var(--border-color) ; | |
| color: var(--text-main) ; | |
| padding: 0 1rem ; | |
| height: 42px ; | |
| border-radius: 6px ; | |
| font-family: inherit ; | |
| font-size: 0.9rem ; | |
| transition: all 0.2s ease ; | |
| -webkit-appearance: none ; | |
| -moz-appearance: none ; | |
| appearance: none ; | |
| background-image: none ; | |
| } | |
| /* Custom dropdown arrow for selects */ | |
| .vram-calculator-wrapper select { | |
| background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 12 12'%3E%3Cpath fill='%23a1a1aa' d='M6 9L1 4h10z'/%3E%3C/svg%3E") ; | |
| background-repeat: no-repeat ; | |
| background-position: right 1rem center ; | |
| padding-right: 2.5rem ; | |
| } | |
| /* Remove any WordPress/theme pseudo-elements */ | |
| .vram-calculator-wrapper select::after, | |
| .vram-calculator-wrapper select::before { | |
| display: none ; | |
| content: none ; | |
| } | |
| .vram-calculator-wrapper input:focus, | |
| .vram-calculator-wrapper select:focus { | |
| outline: none ; | |
| border-color: var(--border-focus) ; | |
| box-shadow: 0 0 0 2px var(--accent-glow) ; | |
| } | |
| .vram-calculator-wrapper .form-row { | |
| display: grid; | |
| grid-template-columns: 1fr 1fr; | |
| gap: 1.5rem; | |
| margin-top: 1.25rem; | |
| } | |
| @media (max-width: 700px) { | |
| .vram-calculator-wrapper .form-row { grid-template-columns: 1fr; gap: 1rem; } | |
| } | |
| .vram-calculator-wrapper .form-grid { | |
| display: grid; | |
| grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); | |
| gap: 1rem; | |
| } | |
| .vram-calculator-wrapper .btn-primary { | |
| background: var(--accent); | |
| color: white; | |
| border: none; | |
| padding: 0 1.5rem; | |
| height: 42px; | |
| border-radius: 6px; | |
| font-weight: 600; | |
| cursor: pointer; | |
| transition: opacity 0.2s; | |
| white-space: nowrap; | |
| display: inline-flex; | |
| align-items: center; | |
| justify-content: center; | |
| } | |
| .vram-calculator-wrapper .btn-primary:hover { opacity: 0.9; } | |
| /* --- Visualization --- */ | |
| .vram-calculator-wrapper .viz-container { margin-top: 1.5rem; } | |
| .vram-calculator-wrapper .viz-bar-wrapper { height: 32px; background: var(--bg-input); border-radius: 6px; display: flex; overflow: hidden; position: relative; } | |
| .vram-calculator-wrapper .viz-segment { height: 100%; transition: width 0.4s ease; } | |
| .vram-calculator-wrapper .seg-model { background: var(--color-weights); } | |
| .vram-calculator-wrapper .seg-kv { background: var(--color-cache); } | |
| .vram-calculator-wrapper .seg-sys { background: var(--color-overhead); } | |
| .vram-calculator-wrapper .seg-over { background: repeating-linear-gradient(45deg, var(--color-overload), var(--color-overload) 10px, #b91c1c 10px, #b91c1c 20px); } | |
| .vram-calculator-wrapper .legend { display: flex; gap: 1.25rem; margin-top: 1rem; flex-wrap: wrap; } | |
| .vram-calculator-wrapper .legend-item { display: flex; align-items: center; gap: 0.4rem; font-size: 0.8rem; color: var(--text-muted); } | |
| .vram-calculator-wrapper .dot { width: 10px; height: 10px; border-radius: 3px; } | |
| .vram-calculator-wrapper .dot.seg-model { background: var(--color-weights); } | |
| .vram-calculator-wrapper .dot.seg-kv { background: var(--color-cache); } | |
| .vram-calculator-wrapper .dot.seg-sys { background: var(--color-overhead); } | |
| .vram-calculator-wrapper .dot.seg-over { background: var(--color-overload); } | |
| .vram-calculator-wrapper .limit-line { position: absolute; top: -4px; bottom: -4px; width: 3px; background: var(--text-main); border-radius: 2px; z-index: 10; display: none; box-shadow: 0 0 8px rgba(255,255,255,0.5); } | |
| /* Recommendations */ | |
| .vram-calculator-wrapper .recs-box { margin-top: 1.5rem; padding: 1rem; border-radius: 8px; background: rgba(16, 185, 129, 0.1); border: 1px solid rgba(16, 185, 129, 0.2); } | |
| .vram-calculator-wrapper .recs-box.warning { background: rgba(245, 158, 11, 0.1); border-color: rgba(245, 158, 11, 0.3); } | |
| .vram-calculator-wrapper .recs-box.danger { background: rgba(239, 68, 68, 0.1); border-color: rgba(239, 68, 68, 0.3); } | |
| .vram-calculator-wrapper .recs-title { font-weight: 600; margin-bottom: 0.75rem; } | |
| .vram-calculator-wrapper .rec-step { display: flex; align-items: center; gap: 0.5rem; margin: 0.5rem 0; font-size: 0.9rem; } | |
| .vram-calculator-wrapper .rec-tag { font-size: 0.7rem; padding: 2px 6px; border-radius: 4px; font-weight: 600; text-transform: uppercase; } | |
| .vram-calculator-wrapper .tag-quant { background: rgba(59, 130, 246, 0.2); color: #60a5fa; } | |
| .vram-calculator-wrapper .tag-ctx { background: rgba(139, 92, 246, 0.2); color: #a78bfa; } | |
| .vram-calculator-wrapper .tag-cache { background: rgba(16, 185, 129, 0.2); color: #34d399; } | |
| .vram-calculator-wrapper .rec-solution { font-weight: 500; margin-bottom: 0.5rem; color: var(--text-main); } | |
| /* Specs Grid */ | |
| .vram-calculator-wrapper .specs-grid { display: grid; grid-template-columns: repeat(2, 1fr); gap: 1rem; } | |
| .vram-calculator-wrapper .spec-item { display: flex; flex-direction: column; gap: 0.25rem; } | |
| .vram-calculator-wrapper .spec-label { font-size: 0.75rem; color: var(--text-dim); text-transform: uppercase; } | |
| .vram-calculator-wrapper .spec-val { font-family: 'JetBrains Mono', monospace; font-size: 1rem; color: var(--text-main); } | |
| /* Searchable dropdown */ | |
| .vram-calculator-wrapper .dropdown-container { position: relative; } | |
| .vram-calculator-wrapper .dropdown-results { position: absolute; top: 100%; left: 0; right: 0; background: var(--bg-card); border: 1px solid var(--border-color); border-radius: 6px; max-height: 200px; overflow-y: auto; z-index: 50; display: none; margin-top: 4px; } | |
| .vram-calculator-wrapper .dropdown-results.active { display: block; } | |
| .vram-calculator-wrapper .dropdown-item { padding: 0.75rem 1rem; cursor: pointer; display: flex; justify-content: space-between; align-items: center; } | |
| .vram-calculator-wrapper .dropdown-item:hover { background: var(--bg-card-hover); } | |
| .vram-calculator-wrapper .dropdown-meta { font-size: 0.8rem; color: var(--text-dim); font-family: 'JetBrains Mono', monospace; } | |
| /* GPU Grid */ | |
| .vram-calculator-wrapper .gpu-section { margin-top: 2rem; } | |
| .vram-calculator-wrapper .gpu-section-title { | |
| font-size: 1rem; | |
| font-weight: 600; | |
| margin-bottom: 1rem; | |
| color: var(--text-main); | |
| display: flex; | |
| justify-content: space-between; | |
| align-items: center; | |
| gap: 1rem; | |
| flex-wrap: wrap; | |
| } | |
| .vram-calculator-wrapper .gpu-section-title select { | |
| max-width: 200px; | |
| min-width: 150px; | |
| flex-shrink: 0; | |
| } | |
| @media (max-width: 600px) { | |
| .vram-calculator-wrapper .gpu-section-title { | |
| flex-direction: column; | |
| align-items: flex-start; | |
| } | |
| .vram-calculator-wrapper .gpu-section-title select { | |
| width: 100%; | |
| max-width: none; | |
| } | |
| } | |
| .vram-calculator-wrapper .gpu-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 1rem; } | |
| .vram-calculator-wrapper .gpu-card { background: var(--bg-card); border: 1px solid var(--border-color); padding: 1.25rem; border-radius: 10px; transition: all 0.2s; } | |
| .vram-calculator-wrapper .gpu-card:hover { transform: translateY(-2px); border-color: var(--border-focus); } | |
| .vram-calculator-wrapper .gpu-top { display: flex; justify-content: space-between; margin-bottom: 1rem; } | |
| .vram-calculator-wrapper .gpu-name { font-weight: 600; font-size: 0.95rem; color: var(--text-main); } | |
| .vram-calculator-wrapper .gpu-vram { font-family: 'JetBrains Mono', monospace; font-size: 0.85rem; color: var(--text-muted); background: rgba(255,255,255,0.05); padding: 2px 6px; border-radius: 4px; } | |
| .vram-calculator-wrapper .gpu-bar-bg { height: 6px; background: var(--bg-input); border-radius: 3px; overflow: hidden; margin-bottom: 0.75rem; } | |
| .vram-calculator-wrapper .gpu-bar-fill { height: 100%; border-radius: 3px; transition: width 0.3s ease; } | |
| .vram-calculator-wrapper .hidden { display: none ; } | |
| .vram-calculator-wrapper .sticky-panel { position: sticky; top: 2rem; } | |
| .vram-calculator-wrapper .checkbox-group { display: flex; align-items: center; gap: 0.5rem; cursor: pointer; user-select: none; padding: 0.75rem; border: 1px solid var(--border-color); border-radius: 6px; background: var(--bg-input); } | |
| .vram-calculator-wrapper .spinner { width: 16px; height: 16px; border: 2px solid rgba(255,255,255,0.3); border-radius: 50%; border-top-color: #fff; animation: spin 0.8s linear infinite; } | |
| @keyframes spin { to { transform: rotate(360deg); } } | |
| ::-webkit-scrollbar { width: 8px; } | |
| ::-webkit-scrollbar-track { background: var(--bg-body); } | |
| ::-webkit-scrollbar-thumb { background: var(--border-color); border-radius: 4px; } | |
| ::-webkit-scrollbar-thumb:hover { background: var(--text-dim); } | |
| .vram-calculator-wrapper .mobile-footer { display: none; position: fixed; bottom: 0; left: 0; right: 0; background: var(--bg-card); border-top: 1px solid var(--border-color); padding: 1rem; z-index: 100; box-shadow: 0 -4px 20px rgba(0,0,0,0.5); } | |
| @media (max-width: 768px) { | |
| .vram-calculator-wrapper .mobile-footer { display: flex; justify-content: space-between; align-items: center; } | |
| .vram-calculator-wrapper .app-container { padding: 1rem; padding-bottom: 80px; } | |
| .vram-calculator-wrapper .gpu-grid { grid-template-columns: 1fr; } | |
| } | |
| /* Lock Toggle Styles */ | |
| .vram-calculator-wrapper .lockable-input { | |
| display: flex; | |
| gap: 0.5rem; | |
| align-items: stretch; | |
| } | |
| .vram-calculator-wrapper .lockable-input select, | |
| .vram-calculator-wrapper .lockable-input input { | |
| flex: 1; | |
| min-width: 0; | |
| } | |
| .vram-calculator-wrapper .lock-btn { | |
| width: 42px; | |
| height: 42px; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| background: var(--bg-input); | |
| border: 1px solid var(--border-color); | |
| border-radius: 6px; | |
| cursor: pointer; | |
| transition: all 0.2s; | |
| color: var(--text-dim); | |
| flex-shrink: 0; | |
| } | |
| .vram-calculator-wrapper .lock-btn:hover { | |
| border-color: var(--text-muted); | |
| color: var(--text-muted); | |
| } | |
| .vram-calculator-wrapper .lock-btn.locked { | |
| background: rgba(59, 130, 246, 0.15); | |
| border-color: var(--accent); | |
| color: var(--accent); | |
| } | |
| .vram-calculator-wrapper .lock-btn svg { | |
| width: 16px; | |
| height: 16px; | |
| } | |
| /* Optimal badge */ | |
| .vram-calculator-wrapper .optimal-badge { | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 0.25rem; | |
| font-size: 0.7rem; | |
| padding: 2px 6px; | |
| border-radius: 4px; | |
| background: rgba(16, 185, 129, 0.15); | |
| color: var(--success); | |
| font-weight: 500; | |
| margin-left: 0.5rem; | |
| } | |
| /* Label with badge container */ | |
| .vram-calculator-wrapper .label-row { | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| margin-bottom: 0.5rem; | |
| } | |
| .vram-calculator-wrapper .label-row label { | |
| margin-bottom: 0; | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="vram-calculator-wrapper"> | |
| <div class="app-container"> | |
| <header> | |
| <h1>LLM VRAM <span class="brand">Calculator</span></h1> | |
| <span id="header-custom-text" style="color: var(--text-muted); font-size: 0.85rem;">HF version of the calculator found at https://novaml.ai/vram/</span> | |
| <a href="#" onclick="window.location.reload()" style="color: var(--text-muted); text-decoration: none; font-size: 0.9rem;">Reset</a> | |
| </header> | |
| <!-- Main Loader Card --> | |
| <div class="card"> | |
| <div class="card-body"> | |
| <div class="input-group"> | |
| <label>HuggingFace Model Path</label> | |
| <div style="display: flex; gap: 1rem;"> | |
| <input type="text" id="model-path" placeholder="e.g. meta-llama/Llama-3.3-70B-Instruct"> | |
| <button id="load-btn" class="btn-primary">Load Model</button> | |
| </div> | |
| </div> | |
| <div class="form-row"> | |
| <div class="input-group"> | |
| <label>HF Token (Optional)</label> | |
| <input type="password" id="hf-token" placeholder="hf_..."> | |
| </div> | |
| <div class="input-group"> | |
| <label>Your Hardware (Optional)</label> | |
| <div class="dropdown-container"> | |
| <input type="text" id="selected-gpu-input" placeholder="Search GPU (e.g. 4090)" autocomplete="off"> | |
| <div class="dropdown-results" id="gpu-dropdown"></div> | |
| </div> | |
| </div> | |
| </div> | |
| <div id="error-msg" style="color: var(--danger); font-size: 0.85rem; margin-top: 0.5rem;"></div> | |
| </div> | |
| </div> | |
| <div class="dashboard"> | |
| <!-- Left: Settings --> | |
| <div class="settings-column"> | |
| <div class="card hidden" id="model-specs" style="margin-bottom: 2rem; border-left: 3px solid var(--accent);"> | |
| <div class="card-header"> | |
| <span>Model Specifications</span> | |
| <span id="spec-arch-badge" style="font-size:0.75rem; background:rgba(255,255,255,0.1); padding:2px 8px; border-radius:4px;">-</span> | |
| </div> | |
| <div class="card-body"> | |
| <div class="specs-grid"> | |
| <div class="spec-item"><span class="spec-label">Parameters</span><span class="spec-val" id="spec-params">-</span></div> | |
| <div class="spec-item"><span class="spec-label">Hidden Size</span><span class="spec-val" id="spec-hidden">-</span></div> | |
| <div class="spec-item"><span class="spec-label">Layers</span><span class="spec-val" id="spec-layers">-</span></div> | |
| <div class="spec-item"><span class="spec-label">Attn Heads</span><span class="spec-val" id="spec-heads">-</span></div> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="card" id="config-panel" style="opacity: 0.5; pointer-events: none;"> | |
| <div class="card-header"> | |
| <span>Inference Configuration</span> | |
| <span id="lock-hint" class="hidden" style="font-size: 0.75rem; font-weight: 400; color: var(--text-dim); margin-left: auto;">🔒 = recalculate with this parameter</span> | |
| </div> | |
| <div class="card-body"> | |
| <div class="form-grid"> | |
| <div class="input-group"> | |
| <div class="label-row"> | |
| <label>Quantization Method</label> | |
| <span id="quant-optimal" class="optimal-badge hidden">✓ Optimal</span> | |
| </div> | |
| <div class="lockable-input"> | |
| <select id="quant"> | |
| <optgroup label="High Quality"> | |
| <option value="FP16">FP16 (16.0 bpw)</option> | |
| <option value="Q8_0">Q8_0 (8.5 bpw)</option> | |
| <option value="Q6_K">Q6_K (6.59 bpw)</option> | |
| </optgroup> | |
| <optgroup label="Balanced"> | |
| <option value="Q5_K_M">Q5_K_M (5.69 bpw)</option> | |
| <option value="Q5_K_S">Q5_K_S (5.54 bpw)</option> | |
| <option value="Q4_K_M" selected>Q4_K_M (4.85 bpw)</option> | |
| <option value="Q4_K_S">Q4_K_S (4.58 bpw)</option> | |
| <option value="Q4_0">Q4_0 (4.55 bpw)</option> | |
| </optgroup> | |
| <optgroup label="Aggressive"> | |
| <option value="Q3_K_M">Q3_K_M (3.91 bpw)</option> | |
| <option value="Q3_K_S">Q3_K_S (3.5 bpw)</option> | |
| <option value="Q2_K">Q2_K (3.35 bpw)</option> | |
| <option value="IQ3_XXS">IQ3_XXS (3.06 bpw)</option> | |
| <option value="IQ2_XXS">IQ2_XXS (2.06 bpw)</option> | |
| </optgroup> | |
| </select> | |
| <button type="button" class="lock-btn" id="lock-quant" title="Lock: optimizer won't change this setting"> | |
| <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"> | |
| <rect x="3" y="11" width="18" height="11" rx="2" ry="2"></rect> | |
| <path d="M7 11V7a5 5 0 0 1 5-5 5 5 0 0 1 5 5v1"></path> | |
| </svg> | |
| </button> | |
| </div> | |
| </div> | |
| <div class="input-group"> | |
| <div class="label-row"> | |
| <label>KV Cache Precision</label> | |
| <span id="cache-optimal" class="optimal-badge hidden">✓ Optimal</span> | |
| </div> | |
| <div class="lockable-input"> | |
| <select id="cache-type"> | |
| <option value="fp16" selected>FP16 (Standard)</option> | |
| <option value="q8_0">Q8_0 (Compressed)</option> | |
| <option value="q4_0">Q4_0 (Highly Compressed)</option> | |
| </select> | |
| <button type="button" class="lock-btn" id="lock-cache" title="Lock: optimizer won't change this setting"> | |
| <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"> | |
| <rect x="3" y="11" width="18" height="11" rx="2" ry="2"></rect> | |
| <path d="M7 11V7a5 5 0 0 1 5-5 5 5 0 0 1 5 5v1"></path> | |
| </svg> | |
| </button> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="form-grid"> | |
| <div class="input-group"> | |
| <div class="label-row"> | |
| <label>Context Length</label> | |
| <span id="context-optimal" class="optimal-badge hidden">✓ Optimal</span> | |
| </div> | |
| <div class="lockable-input"> | |
| <input type="number" id="context" value="8192" step="1024" min="512"> | |
| <button type="button" class="lock-btn" id="lock-context" title="Lock: optimizer won't change this setting"> | |
| <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"> | |
| <rect x="3" y="11" width="18" height="11" rx="2" ry="2"></rect> | |
| <path d="M7 11V7a5 5 0 0 1 5-5 5 5 0 0 1 5 5v1"></path> | |
| </svg> | |
| </button> | |
| </div> | |
| </div> | |
| <div class="input-group"> | |
| <label>Batch Size</label> | |
| <input type="number" id="batch" value="512" step="1" min="1"> | |
| </div> | |
| </div> | |
| <div class="input-group"> | |
| <label>Framework</label> | |
| <select id="framework"> | |
| <option value="llama.cpp" selected>llama.cpp (Efficient)</option> | |
| <option value="ExLlamaV2">ExLlamaV2 (Very Efficient)</option> | |
| <option value="vLLM">vLLM (Production)</option> | |
| <option value="transformers">HuggingFace Transformers (Heavy)</option> | |
| </select> | |
| </div> | |
| <div class="form-grid"> | |
| <label class="checkbox-group"> | |
| <input type="checkbox" id="flash-attn" checked> | |
| <span>Flash Attention</span> | |
| </label> | |
| <label class="checkbox-group hidden" id="mmproj-wrapper"> | |
| <input type="checkbox" id="mmproj" checked> | |
| <span>Vision Adapter</span> | |
| </label> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Right: Results --> | |
| <div class="results-column"> | |
| <div class="card sticky-panel"> | |
| <div class="card-header"> | |
| <span>Estimation Results</span> | |
| </div> | |
| <div class="card-body"> | |
| <div style="text-align: center; margin-bottom: 1.5rem;"> | |
| <div style="font-size: 0.8rem; color: var(--text-muted); text-transform: uppercase;">Estimated Usage</div> | |
| <div style="font-size: 2.5rem; font-weight: 700; color: var(--text-main); font-family: 'JetBrains Mono';"> | |
| <span id="total-vram">0.0</span> <span style="font-size: 1rem; color: var(--text-dim);">GB</span> | |
| </div> | |
| </div> | |
| <div class="viz-container"> | |
| <div class="viz-bar-wrapper"> | |
| <div id="limit-line" class="limit-line"></div> | |
| <div class="viz-segment seg-model" id="seg-model" style="width:0%"></div> | |
| <div class="viz-segment seg-kv" id="seg-kv" style="width:0%"></div> | |
| <div class="viz-segment seg-sys" id="seg-sys" style="width:0%"></div> | |
| <div class="viz-segment seg-over" id="seg-over" style="width:0%; display:none;"></div> | |
| </div> | |
| <div class="legend"> | |
| <div class="legend-item"><div class="dot seg-model"></div> Model</div> | |
| <div class="legend-item"><div class="dot seg-kv"></div> Context</div> | |
| <div class="legend-item"><div class="dot seg-sys"></div> Overhead</div> | |
| <div class="legend-item hidden" id="legend-over"><div class="dot seg-over"></div> Overload</div> | |
| </div> | |
| </div> | |
| <div id="rec-container" class="hidden"> | |
| <div id="rec-box" class="recs-box"> | |
| <div class="recs-title" id="rec-title"></div> | |
| <div id="rec-text"></div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="gpu-section"> | |
| <div class="gpu-section-title"> | |
| <span>Compatibility Matrix</span> | |
| <select id="gpu-filter"> | |
| <option value="">All GPUs</option> | |
| <option value="consumer">Consumer</option> | |
| <option value="datacenter">Datacenter</option> | |
| </select> | |
| </div> | |
| <div id="gpu-grid" class="gpu-grid"></div> | |
| </div> | |
| </div> | |
| <div class="mobile-footer"> | |
| <div> | |
| <div style="font-size: 0.75rem; color: var(--text-muted);">Total VRAM</div> | |
| <div style="font-weight: 700; color: var(--text-main); font-size: 1.2rem;"> | |
| <span id="mobile-vram-val">0.0</span> GB | |
| </div> | |
| </div> | |
| <button onclick="window.scrollTo({top: 0, behavior: 'smooth'})" class="btn-primary" style="padding: 0.5rem 1rem;">Edit</button> | |
| </div> | |
| <script> | |
| // ============================================ | |
| // CONSTANTS | |
| // ============================================ | |
| const BPW = { | |
| 'FP16': 16, 'Q8_0': 8.5, 'Q6_K': 6.59, 'Q5_K_M': 5.69, 'Q5_K_S': 5.54, | |
| 'Q4_K_M': 4.85, 'Q4_K_S': 4.58, 'Q4_0': 4.55, 'Q3_K_M': 3.91, 'Q3_K_S': 3.5, | |
| 'Q2_K': 3.35, 'IQ3_XXS': 3.06, 'IQ2_XXS': 2.06 | |
| }; | |
| // Priority ordered (highest quality first) | |
| const QUANTS_ORDERED = ['FP16', 'Q8_0', 'Q6_K', 'Q5_K_M', 'Q5_K_S', 'Q4_K_M', 'Q4_K_S', 'Q4_0', 'Q3_K_M', 'Q3_K_S', 'Q2_K', 'IQ3_XXS', 'IQ2_XXS']; | |
| const CACHE_ORDERED = ['fp16', 'q8_0', 'q4_0']; | |
| const CONTEXT_TIERS = [131072, 65536, 32768, 16384, 8192, 4096, 2048]; | |
| const GPUS = [ | |
| { name: 'NVIDIA RTX 4090', vram: 24, type: 'consumer' }, | |
| { name: 'NVIDIA RTX 4080 Super', vram: 16, type: 'consumer' }, | |
| { name: 'NVIDIA RTX 4080', vram: 16, type: 'consumer' }, | |
| { name: 'NVIDIA RTX 4070 Ti Super', vram: 16, type: 'consumer' }, | |
| { name: 'NVIDIA RTX 4070 Ti', vram: 12, type: 'consumer' }, | |
| { name: 'NVIDIA RTX 4060 Ti 16GB', vram: 16, type: 'consumer' }, | |
| { name: 'NVIDIA RTX 3090', vram: 24, type: 'consumer' }, | |
| { name: 'NVIDIA A100 80GB', vram: 80, type: 'datacenter' }, | |
| { name: 'NVIDIA A6000 Ada', vram: 48, type: 'datacenter' }, | |
| { name: 'NVIDIA H100 80GB', vram: 80, type: 'datacenter' }, | |
| { name: 'NVIDIA L40S', vram: 48, type: 'datacenter' }, | |
| { name: 'Mac M3 Max (128GB)', vram: 128, type: 'consumer' }, | |
| { name: 'Mac M3 Max (64GB)', vram: 64, type: 'consumer' }, | |
| { name: 'Mac M3 Pro (36GB)', vram: 36, type: 'consumer' } | |
| ]; | |
| let modelConfig = null; | |
| let selectedGPUVRAM = null; | |
| let optimalConfig = null; | |
| // Lock states | |
| const locks = { | |
| quant: false, | |
| context: false, | |
| cache: false | |
| }; | |
| // ============================================ | |
| // DOM ELEMENTS | |
| // ============================================ | |
| const els = { | |
| loadBtn: document.getElementById('load-btn'), | |
| modelPath: document.getElementById('model-path'), | |
| errorMsg: document.getElementById('error-msg'), | |
| configPanel: document.getElementById('config-panel'), | |
| modelSpecs: document.getElementById('model-specs'), | |
| totalVram: document.getElementById('total-vram'), | |
| mobileVram: document.getElementById('mobile-vram-val'), | |
| segModel: document.getElementById('seg-model'), | |
| segKv: document.getElementById('seg-kv'), | |
| segSys: document.getElementById('seg-sys'), | |
| segOver: document.getElementById('seg-over'), | |
| limitLine: document.getElementById('limit-line'), | |
| legendOver: document.getElementById('legend-over'), | |
| recContainer: document.getElementById('rec-container'), | |
| recBox: document.getElementById('rec-box'), | |
| recTitle: document.getElementById('rec-title'), | |
| recText: document.getElementById('rec-text'), | |
| gpuGrid: document.getElementById('gpu-grid'), | |
| lockHint: document.getElementById('lock-hint'), | |
| specs: { | |
| params: document.getElementById('spec-params'), | |
| hidden: document.getElementById('spec-hidden'), | |
| layers: document.getElementById('spec-layers'), | |
| heads: document.getElementById('spec-heads'), | |
| badge: document.getElementById('spec-arch-badge') | |
| }, | |
| locks: { | |
| quant: document.getElementById('lock-quant'), | |
| context: document.getElementById('lock-context'), | |
| cache: document.getElementById('lock-cache') | |
| }, | |
| optimal: { | |
| quant: document.getElementById('quant-optimal'), | |
| context: document.getElementById('context-optimal'), | |
| cache: document.getElementById('cache-optimal') | |
| } | |
| }; | |
| // ============================================ | |
| // GPU DROPDOWN | |
| // ============================================ | |
| const gpuInput = document.getElementById('selected-gpu-input'); | |
| const gpuDropdown = document.getElementById('gpu-dropdown'); | |
| function renderDropdown(filterText = '') { | |
| gpuDropdown.innerHTML = ''; | |
| const lowerFilter = filterText.toLowerCase(); | |
| const filtered = GPUS.filter(g => g.name.toLowerCase().includes(lowerFilter)); | |
| if (filtered.length === 0) { | |
| const div = document.createElement('div'); | |
| div.className = 'dropdown-item'; | |
| div.textContent = 'No GPUs found'; | |
| div.style.color = 'var(--text-dim)'; | |
| gpuDropdown.appendChild(div); | |
| } else { | |
| filtered.forEach(gpu => { | |
| const div = document.createElement('div'); | |
| div.className = 'dropdown-item'; | |
| div.innerHTML = `<span>${gpu.name}</span><span class="dropdown-meta">${gpu.vram} GB</span>`; | |
| div.onclick = () => { | |
| gpuInput.value = gpu.name; | |
| selectedGPUVRAM = gpu.vram; | |
| gpuDropdown.classList.remove('active'); | |
| onHardwareChange(); | |
| }; | |
| gpuDropdown.appendChild(div); | |
| }); | |
| } | |
| } | |
| gpuInput.addEventListener('focus', () => { renderDropdown(gpuInput.value); gpuDropdown.classList.add('active'); }); | |
| gpuInput.addEventListener('input', (e) => { | |
| renderDropdown(e.target.value); | |
| if(e.target.value === '') { | |
| selectedGPUVRAM = null; | |
| } else { | |
| const match = GPUS.find(g => g.name.toLowerCase() === e.target.value.toLowerCase()); | |
| if(match) selectedGPUVRAM = match.vram; | |
| } | |
| onHardwareChange(); | |
| }); | |
| document.addEventListener('click', (e) => { | |
| if (!gpuInput.contains(e.target) && !gpuDropdown.contains(e.target)) gpuDropdown.classList.remove('active'); | |
| }); | |
| // ============================================ | |
| // LOCK TOGGLES | |
| // ============================================ | |
| Object.keys(els.locks).forEach(key => { | |
| els.locks[key].addEventListener('click', () => { | |
| locks[key] = !locks[key]; | |
| els.locks[key].classList.toggle('locked', locks[key]); | |
| // Update icon to locked/unlocked | |
| if (locks[key]) { | |
| els.locks[key].innerHTML = ` | |
| <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"> | |
| <rect x="3" y="11" width="18" height="11" rx="2" ry="2"></rect> | |
| <path d="M7 11V7a5 5 0 0 1 10 0v4"></path> | |
| </svg>`; | |
| els.locks[key].title = 'Locked: optimizer will keep this setting'; | |
| } else { | |
| els.locks[key].innerHTML = ` | |
| <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"> | |
| <rect x="3" y="11" width="18" height="11" rx="2" ry="2"></rect> | |
| <path d="M7 11V7a5 5 0 0 1 5-5 5 5 0 0 1 5 5v1"></path> | |
| </svg>`; | |
| els.locks[key].title = 'Lock: optimizer won\'t change this setting'; | |
| } | |
| onHardwareChange(); | |
| }); | |
| }); | |
| // ============================================ | |
| // EVENT LISTENERS | |
| // ============================================ | |
| els.loadBtn.addEventListener('click', loadModel); | |
| els.modelPath.addEventListener('keydown', e => e.key === 'Enter' && loadModel()); | |
| const inputs = ['quant', 'cache-type', 'context', 'batch', 'framework', 'flash-attn', 'mmproj', 'gpu-filter']; | |
| inputs.forEach(id => { | |
| const el = document.getElementById(id); | |
| if(el) { | |
| el.addEventListener('change', calculate); | |
| if(el.tagName === 'INPUT') el.addEventListener('input', calculate); | |
| } | |
| }); | |
| // ============================================ | |
| // MODEL LOADING | |
| // ============================================ | |
| async function loadModel() { | |
| const path = els.modelPath.value.trim(); | |
| if (!path) { showError('Enter model path'); return; } | |
| showError(''); | |
| els.loadBtn.disabled = true; | |
| els.loadBtn.innerHTML = '<div class="spinner"></div>'; | |
| try { | |
| const token = document.getElementById('hf-token').value.trim(); | |
| const headers = token ? { 'Authorization': `Bearer ${token}` } : {}; | |
| const res = await fetch(`https://huggingface.co/${path}/raw/main/config.json`, { headers }); | |
| if (!res.ok) throw new Error(res.status === 401 ? 'Model is gated. Provide HF Token.' : 'Model not found'); | |
| let config = await res.json(); | |
| if (config.text_config) config = config.text_config; | |
| let params = 0; | |
| let hidden = config.hidden_size || config.d_model || 4096; | |
| let layers = config.num_hidden_layers || config.n_layer || 32; | |
| let heads = config.num_attention_heads || config.n_head || 32; | |
| let kvHeads = config.num_key_value_heads || heads; | |
| try { | |
| const idx = await fetch(`https://huggingface.co/${path}/resolve/main/model.safetensors.index.json`, { headers }); | |
| if (idx.ok) { | |
| const data = await idx.json(); | |
| params = data.metadata?.total_size ? (data.metadata.total_size / 2) / 1e9 : 0; | |
| } | |
| } catch(e){} | |
| if (!params) params = ((hidden * hidden * layers * 12) + (hidden * 32000)) / 1e9; | |
| modelConfig = { | |
| name: path, | |
| params: params, | |
| layers: layers, | |
| hidden: hidden, | |
| heads: heads, | |
| kvHeads: kvHeads, | |
| isVision: config.architectures?.[0]?.toLowerCase().includes('llava') || false, | |
| arch: config.architectures?.[0] || 'Transformer' | |
| }; | |
| els.configPanel.style.opacity = '1'; | |
| els.configPanel.style.pointerEvents = 'auto'; | |
| els.modelSpecs.classList.remove('hidden'); | |
| els.specs.params.textContent = `${modelConfig.params.toFixed(1)}B`; | |
| els.specs.hidden.textContent = hidden; | |
| els.specs.layers.textContent = layers; | |
| els.specs.heads.textContent = `${heads} / ${kvHeads} KV`; | |
| els.specs.badge.textContent = modelConfig.arch; | |
| document.getElementById('mmproj-wrapper').classList.toggle('hidden', !modelConfig.isVision); | |
| onHardwareChange(); | |
| } catch (err) { | |
| showError(err.message); | |
| } finally { | |
| els.loadBtn.disabled = false; | |
| els.loadBtn.textContent = 'Load Model'; | |
| } | |
| } | |
| // ============================================ | |
| // VRAM CALCULATION | |
| // ============================================ | |
| function calculateVRAM(overrides = {}) { | |
| const quant = overrides.quant || document.getElementById('quant').value; | |
| const ctx = overrides.ctx !== undefined ? overrides.ctx : parseInt(document.getElementById('context').value); | |
| const cache = overrides.cache || document.getElementById('cache-type').value; | |
| const batch = overrides.batch || parseInt(document.getElementById('batch').value) || 1; | |
| const flash = overrides.flash !== undefined ? overrides.flash : document.getElementById('flash-attn').checked; | |
| const fw = overrides.fw || document.getElementById('framework').value; | |
| const vision = document.getElementById('mmproj').checked && modelConfig.isVision; | |
| const weights = (modelConfig.params * BPW[quant]) / 8; | |
| const kvRatio = modelConfig.heads / modelConfig.kvHeads; | |
| const elPerToken = 2 * modelConfig.layers * (modelConfig.hidden / kvRatio); | |
| let cacheBytes = cache === 'fp16' ? 2 : (cache === 'q8_0' ? 1 : 0.5); | |
| let kv = (elPerToken * ctx * cacheBytes) / (1024**3); | |
| if (flash) kv *= 0.65; | |
| const frameworkOverheadPct = { 'llama.cpp': 0.15, 'ExLlamaV2': 0.14, 'vLLM': 0.12, 'transformers': 0.22 }; | |
| const visionSize = vision ? 0.8 : 0; | |
| const overhead = (weights * frameworkOverheadPct[fw]) + visionSize; | |
| const total = weights + kv + overhead; | |
| return { total, weights, kv, overhead }; | |
| } | |
| // ============================================ | |
| // OPTIMAL CONFIG FINDER | |
| // ============================================ | |
| function findOptimalConfig(vramLimit, constraints = {}) { | |
| if (!modelConfig) return null; | |
| const flash = document.getElementById('flash-attn').checked; | |
| // If all three are locked, just check if it fits | |
| if (constraints.quant && constraints.ctx && constraints.cache) { | |
| const sim = calculateVRAM({ | |
| quant: constraints.quant, | |
| ctx: constraints.ctx, | |
| cache: constraints.cache, | |
| flash | |
| }); | |
| return sim.total <= vramLimit ? { | |
| quant: constraints.quant, | |
| ctx: constraints.ctx, | |
| cache: constraints.cache, | |
| vram: sim.total, | |
| fits: true | |
| } : null; | |
| } | |
| // Get iteration ranges based on locks | |
| const quantRange = constraints.quant ? [constraints.quant] : QUANTS_ORDERED; | |
| const cacheRange = constraints.cache ? [constraints.cache] : CACHE_ORDERED; | |
| // For context, start from locked value or try tiers | |
| let contextRange; | |
| if (constraints.ctx) { | |
| contextRange = [constraints.ctx]; | |
| } else { | |
| // Use standard tiers, filtered to reasonable values | |
| contextRange = CONTEXT_TIERS.filter(c => c <= 131072); | |
| } | |
| // Greedy search: prioritize quant > cache > context | |
| // (Preserve FP16 cache over high context - cache quantization has more quality impact) | |
| for (const quant of quantRange) { | |
| for (const cache of cacheRange) { | |
| for (const ctx of contextRange) { | |
| const sim = calculateVRAM({ quant, ctx, cache, flash }); | |
| if (sim.total <= vramLimit) { | |
| return { | |
| quant, | |
| ctx, | |
| cache, | |
| vram: sim.total, | |
| fits: true | |
| }; | |
| } | |
| } | |
| } | |
| } | |
| return null; // Nothing fits | |
| } | |
| // ============================================ | |
| // MAIN UPDATE FLOW | |
| // ============================================ | |
| function onHardwareChange() { | |
| if (!modelConfig) return; | |
| // Show/hide lock hint based on GPU selection | |
| els.lockHint.classList.toggle('hidden', !selectedGPUVRAM); | |
| // Build constraints from locks | |
| const constraints = {}; | |
| if (locks.quant) constraints.quant = document.getElementById('quant').value; | |
| if (locks.context) constraints.ctx = parseInt(document.getElementById('context').value); | |
| if (locks.cache) constraints.cache = document.getElementById('cache-type').value; | |
| // Find optimal config | |
| if (selectedGPUVRAM) { | |
| optimalConfig = findOptimalConfig(selectedGPUVRAM, constraints); | |
| // If no locks, apply optimal config automatically | |
| if (!locks.quant && !locks.context && !locks.cache && optimalConfig) { | |
| document.getElementById('quant').value = optimalConfig.quant; | |
| document.getElementById('context').value = optimalConfig.ctx; | |
| document.getElementById('cache-type').value = optimalConfig.cache; | |
| } | |
| } else { | |
| optimalConfig = null; | |
| } | |
| calculate(); | |
| } | |
| function calculate() { | |
| if (!modelConfig) return; | |
| const res = calculateVRAM(); | |
| updateUI(res.total, res.weights, res.kv, res.overhead); | |
| renderGrid(res.total); | |
| } | |
| function updateUI(total, w, k, o) { | |
| els.totalVram.textContent = total.toFixed(1); | |
| els.mobileVram.textContent = total.toFixed(1); | |
| const limit = selectedGPUVRAM || total; | |
| const isOver = total > limit && selectedGPUVRAM !== null; | |
| const totalWidth = isOver ? total : limit; | |
| els.segModel.style.width = `${(w / totalWidth) * 100}%`; | |
| els.segKv.style.width = `${(k / totalWidth) * 100}%`; | |
| els.segSys.style.width = `${(o / totalWidth) * 100}%`; | |
| // Update optimal badges | |
| updateOptimalBadges(); | |
| if (isOver) { | |
| const limitPos = (limit / total) * 100; | |
| els.segOver.style.display = 'block'; | |
| els.segOver.style.position = 'absolute'; | |
| els.segOver.style.left = `${limitPos}%`; | |
| els.segOver.style.right = '0'; | |
| els.segOver.style.width = 'auto'; | |
| els.limitLine.style.display = 'block'; | |
| els.limitLine.style.left = `${limitPos}%`; | |
| els.legendOver.classList.remove('hidden'); | |
| showOverflowRecommendation(total, limit); | |
| } else { | |
| els.segOver.style.display = 'none'; | |
| els.limitLine.style.display = 'none'; | |
| els.legendOver.classList.add('hidden'); | |
| if (selectedGPUVRAM) { | |
| showFitRecommendation(total, limit); | |
| } else { | |
| els.recContainer.classList.add('hidden'); | |
| } | |
| } | |
| } | |
| function updateOptimalBadges() { | |
| if (!optimalConfig || !selectedGPUVRAM) { | |
| els.optimal.quant.classList.add('hidden'); | |
| els.optimal.context.classList.add('hidden'); | |
| els.optimal.cache.classList.add('hidden'); | |
| return; | |
| } | |
| const currentQuant = document.getElementById('quant').value; | |
| const currentCtx = parseInt(document.getElementById('context').value); | |
| const currentCache = document.getElementById('cache-type').value; | |
| els.optimal.quant.classList.toggle('hidden', currentQuant !== optimalConfig.quant); | |
| els.optimal.context.classList.toggle('hidden', currentCtx !== optimalConfig.ctx); | |
| els.optimal.cache.classList.toggle('hidden', currentCache !== optimalConfig.cache); | |
| } | |
| // ============================================ | |
| // RECOMMENDATIONS | |
| // ============================================ | |
| function showOverflowRecommendation(currentTotal, limit) { | |
| els.recContainer.classList.remove('hidden'); | |
| els.recBox.className = 'recs-box danger'; | |
| const diff = currentTotal - limit; | |
| els.recTitle.innerHTML = `<span style="color:var(--danger)">⚠️ Over VRAM Limit by ${diff.toFixed(1)} GB</span>`; | |
| // Check if optimal exists | |
| if (optimalConfig) { | |
| const currentQuant = document.getElementById('quant').value; | |
| const currentCtx = parseInt(document.getElementById('context').value); | |
| const currentCache = document.getElementById('cache-type').value; | |
| let html = '<div class="rec-solution">Suggested changes to fit:</div>'; | |
| let hasChanges = false; | |
| if (optimalConfig.quant !== currentQuant) { | |
| html += `<div class="rec-step"><span class="rec-tag tag-quant">QUANT</span> Switch to <b>${optimalConfig.quant}</b></div>`; | |
| hasChanges = true; | |
| } | |
| if (optimalConfig.ctx !== currentCtx) { | |
| html += `<div class="rec-step"><span class="rec-tag tag-ctx">CONTEXT</span> ${optimalConfig.ctx < currentCtx ? 'Reduce' : 'Increase'} to <b>${optimalConfig.ctx.toLocaleString()}</b></div>`; | |
| hasChanges = true; | |
| } | |
| if (optimalConfig.cache !== currentCache) { | |
| html += `<div class="rec-step"><span class="rec-tag tag-cache">CACHE</span> Switch to <b>${optimalConfig.cache.toUpperCase()}</b></div>`; | |
| hasChanges = true; | |
| } | |
| if (hasChanges) { | |
| html += `<div style="margin-top:0.75rem; color:var(--text-muted); font-size:0.85rem;">This would use ${optimalConfig.vram.toFixed(1)} GB (${(limit - optimalConfig.vram).toFixed(1)} GB free)</div>`; | |
| } | |
| els.recText.innerHTML = html; | |
| } else { | |
| // Nothing fits even with optimization | |
| let html = '<div style="color:var(--text-muted);">This model cannot fit on your GPU'; | |
| // Check which locks are causing the issue | |
| const lockedSettings = []; | |
| if (locks.quant) lockedSettings.push('quantization'); | |
| if (locks.context) lockedSettings.push('context length'); | |
| if (locks.cache) lockedSettings.push('cache precision'); | |
| if (lockedSettings.length > 0) { | |
| html += ` with locked ${lockedSettings.join(', ')}. Try unlocking some settings.`; | |
| } else { | |
| html += ' even with maximum optimization.'; | |
| } | |
| html += '</div>'; | |
| els.recText.innerHTML = html; | |
| } | |
| } | |
| function showFitRecommendation(currentTotal, limit) { | |
| els.recContainer.classList.remove('hidden'); | |
| const headroom = limit - currentTotal; | |
| const currentQuant = document.getElementById('quant').value; | |
| const currentCtx = parseInt(document.getElementById('context').value); | |
| const currentCache = document.getElementById('cache-type').value; | |
| // Check if at optimal | |
| const isOptimal = optimalConfig && | |
| currentQuant === optimalConfig.quant && | |
| currentCtx === optimalConfig.ctx && | |
| currentCache === optimalConfig.cache; | |
| if (isOptimal) { | |
| els.recBox.className = 'recs-box'; | |
| els.recTitle.innerHTML = `<span style="color:var(--success)">✅ Optimal Configuration</span>`; | |
| if (headroom < 1.0) { | |
| els.recText.innerHTML = '<div style="color:var(--text-muted);">Fit is tight. Consider closing other GPU applications.</div>'; | |
| } else { | |
| els.recText.innerHTML = `<div style="color:var(--text-muted);">Best settings for your hardware. ${headroom.toFixed(1)} GB headroom.</div>`; | |
| } | |
| } else if (optimalConfig) { | |
| // Show delta from optimal | |
| els.recBox.className = 'recs-box warning'; | |
| els.recTitle.innerHTML = `<span style="color:var(--warning)">💡 Better Configuration Available</span>`; | |
| let html = '<div class="rec-solution">For optimal performance:</div>'; | |
| if (optimalConfig.quant !== currentQuant) { | |
| const qIdxCurrent = QUANTS_ORDERED.indexOf(currentQuant); | |
| const qIdxOptimal = QUANTS_ORDERED.indexOf(optimalConfig.quant); | |
| const direction = qIdxOptimal < qIdxCurrent ? 'Upgrade' : 'Downgrade'; | |
| html += `<div class="rec-step"><span class="rec-tag tag-quant">QUANT</span> ${direction} to <b>${optimalConfig.quant}</b></div>`; | |
| } | |
| if (optimalConfig.ctx !== currentCtx) { | |
| const direction = optimalConfig.ctx > currentCtx ? 'Increase' : 'Reduce'; | |
| html += `<div class="rec-step"><span class="rec-tag tag-ctx">CONTEXT</span> ${direction} to <b>${optimalConfig.ctx.toLocaleString()}</b></div>`; | |
| } | |
| if (optimalConfig.cache !== currentCache) { | |
| html += `<div class="rec-step"><span class="rec-tag tag-cache">CACHE</span> Switch to <b>${optimalConfig.cache.toUpperCase()}</b></div>`; | |
| } | |
| els.recText.innerHTML = html; | |
| } else { | |
| // No GPU selected case (shouldn't reach here but safety) | |
| els.recBox.className = 'recs-box'; | |
| els.recTitle.innerHTML = `<span style="color:var(--success)">✅ Configuration Valid</span>`; | |
| els.recText.innerHTML = `<div style="color:var(--text-muted);">${headroom.toFixed(1)} GB headroom remaining.</div>`; | |
| } | |
| } | |
| // ============================================ | |
| // GPU GRID | |
| // ============================================ | |
| function renderGrid(req) { | |
| const filter = document.getElementById('gpu-filter').value; | |
| els.gpuGrid.innerHTML = ''; | |
| let list = GPUS; | |
| if (filter) list = list.filter(g => g.type === filter); | |
| list.forEach(gpu => { | |
| const percent = (req / gpu.vram) * 100; | |
| const isSafe = req <= gpu.vram; | |
| const isTight = isSafe && req > (gpu.vram * 0.9); | |
| let color = isSafe ? (isTight ? 'var(--warning)' : 'var(--success)') : 'var(--danger)'; | |
| const html = ` | |
| <div class="gpu-card" style="${!isSafe ? 'opacity:0.8;' : ''}"> | |
| <div class="gpu-top"> | |
| <span class="gpu-name">${gpu.name}</span> | |
| <span class="gpu-vram">${gpu.vram} GB</span> | |
| </div> | |
| <div class="gpu-bar-bg"> | |
| <div class="gpu-bar-fill" style="width:${Math.min(percent, 100)}%; background:${color}"></div> | |
| </div> | |
| <div style="display:flex; justify-content:space-between; font-size:0.8rem;"> | |
| <span style="color:${color}; font-weight:500">${isSafe ? (isTight ? 'Tight Fit' : 'Comfortable') : 'Insufficient'}</span> | |
| <span style="color:var(--text-dim)">${percent.toFixed(0)}% usage</span> | |
| </div> | |
| </div> | |
| `; | |
| els.gpuGrid.innerHTML += html; | |
| }); | |
| } | |
| function showError(msg) { els.errorMsg.textContent = msg; } | |
| </script> | |
| </div> | |
| </body> | |
| </html> | |