| | <!DOCTYPE html> |
| | <html lang="en"> |
| | <head> |
| | <meta charset="UTF-8" /> |
| | <meta name="viewport" content="width=device-width, initial-scale=1.0" /> |
| | <script> |
| | function strToHtml(str) { |
| | let parser = new DOMParser(); |
| | return parser.parseFromString(str, "text/html"); |
| | } |
| | |
| | |
| | |
| | function tableToObj(table) { |
| | var rows = table.rows; |
| | var propCells = rows[0].cells; |
| | var propNames = []; |
| | var results = []; |
| | var obj, row, cells; |
| | |
| | |
| | |
| | |
| | for (var i = 0, iLen = propCells.length; i < iLen; i++) { |
| | propNames.push( |
| | (propCells[i].textContent || propCells[i].innerText).trim() |
| | ); |
| | } |
| | |
| | |
| | |
| | |
| | for (var j = 1, jLen = rows.length; j < jLen; j++) { |
| | cells = rows[j].cells; |
| | obj = {}; |
| | |
| | for (var k = 0; k < iLen; k++) { |
| | obj[propNames[k]] = ( |
| | cells[k].textContent || cells[k].innerText |
| | ).trim(); |
| | } |
| | results.push(obj); |
| | } |
| | return results; |
| | } |
| | |
| | function formatGpu(gpus) { |
| | return gpus.map( |
| | (g) => `${g["Product Name"]} - ${g["Memory"].split(",")[0]}` |
| | ); |
| | } |
| | |
| | const gguf_quants = { |
| | "Q2_K": 3.35, |
| | "Q3_K_S": 3.5, |
| | "Q3_K_M": 3.91, |
| | "Q3_K_L": 4.27, |
| | "Q4_0": 4.55, |
| | "Q4_K_S": 4.58, |
| | "Q4_K_M": 4.85, |
| | "Q5_0": 5.54, |
| | "Q5_K_S": 5.54, |
| | "Q5_K_M": 5.69, |
| | "Q6_K": 6.59, |
| | "Q8_0": 8.5, |
| | } |
| | |
| | async function modelConfig(hf_model) { |
| | let config = await fetch( |
| | `https://huggingface.co/${hf_model}/raw/main/config.json` |
| | ).then(r => r.json()) |
| | let model_size = 0 |
| | try { |
| | model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/model.safetensors.index.json`).then(r => r.json()))["metadata"]["total_size"] / 2 |
| | if (isNaN(model_size)) { |
| | throw new Erorr("no size in safetensors metadata") |
| | } |
| | } catch (e) { |
| | try { |
| | model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/pytorch_model.bin.index.json`).then(r => r.json()))["metadata"]["total_size"] / 2 |
| | if (isNaN(model_size)) { |
| | throw new Erorr("no size in pytorch metadata") |
| | } |
| | } catch { |
| | let model_page = await fetch( |
| | "https://corsproxy.io/?" + encodeURIComponent(`https://huggingface.co/${hf_model}`) |
| | ).then(r => r.text()) |
| | let el = document.createElement( 'html' ); |
| | el.innerHTML = model_page |
| | let params_el = el.querySelector('div[data-target="ModelSafetensorsParams"]') |
| | if (params_el !== null) { |
| | model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["safetensors"]["total"] |
| | } else { |
| | params_el = el.querySelector('div[data-target="ModelHeader"]') |
| | model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["model"]["safetensors"]["total"] |
| | } |
| | } |
| | } |
| | config.parameters = model_size |
| | return config |
| | } |
| | |
| | function inputBuffer(context=8192, model_config, bsz=512) { |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | const inp_tokens = bsz |
| | const inp_embd = model_config["hidden_size"] * bsz |
| | const inp_pos = bsz |
| | const inp_KQ_mask = context * bsz |
| | const inp_K_shift = context |
| | const inp_sum = bsz |
| | |
| | return inp_tokens + inp_embd + inp_pos + inp_KQ_mask + inp_K_shift + inp_sum |
| | } |
| | |
| | function computeBuffer(context=8192, model_config, bsz=512) { |
| | if (bsz != 512) { |
| | alert("batch size other than 512 is currently not supported for the compute buffer, using batchsize 512 for compute buffer calculation, end result result will be an overestimatition") |
| | } |
| | return (context / 1024 * 2 + 0.75) * model_config["num_attention_heads"] * 1024 * 1024 |
| | } |
| | |
| | function kvCache(context=8192, model_config, cache_bit=16) { |
| | const n_gqa = model_config["num_attention_heads"] / model_config["num_key_value_heads"] |
| | const n_embd_gqa = model_config["hidden_size"] / n_gqa |
| | const n_elements = n_embd_gqa * (model_config["num_hidden_layers"] * context) |
| | const size = 2 * n_elements |
| | return size * (cache_bit / 8) |
| | } |
| | |
| | function contextSize(context=8192, model_config, bsz=512, cache_bit=16) { |
| | return Number.parseFloat((inputBuffer(context, model_config, bsz) + kvCache(context, model_config, cache_bit) + computeBuffer(context, model_config, bsz)).toFixed(2)) |
| | } |
| | |
| | function modelSize(model_config, bpw=4.5) { |
| | return Number.parseFloat((model_config["parameters"] * bpw / 8).toFixed(2)) |
| | } |
| | |
| | async function calculateSizes(format) { |
| | try { |
| | const model_config = await modelConfig(document.getElementById("modelsearch").value) |
| | const context = parseInt(document.getElementById("contextsize").value) |
| | let bsz = 512 |
| | let cache_bit = 16 |
| | let bpw = 0 |
| | if (format === "gguf") { |
| | bsz = parseInt(document.getElementById("batchsize").value) |
| | bpw = gguf_quants[document.getElementById("quantsize").innerText] |
| | |
| | } else if (format == "exl2") { |
| | cache_bit = Number.parseInt(document.getElementById("kvCache").value) |
| | bpw = Number.parseFloat(document.getElementById("bpw").value) |
| | } |
| | |
| | const model_size = modelSize(model_config, bpw) |
| | const context_size = contextSize(context, model_config, bsz, cache_bit) |
| | const total_size = ((model_size + context_size) / 2**30) |
| | document.getElementById("resultmodel").innerText = (model_size / 2**30).toFixed(2) |
| | document.getElementById("resultcontext").innerText = (context_size / 2**30).toFixed(2) |
| | const result_total_el = document.getElementById("resulttotal"); |
| | result_total_el.innerText = total_size.toFixed(2) |
| | |
| | const gpu = document.getElementById("gpusearch").value |
| | if (gpu !== "") { |
| | const vram = parseFloat(gpu.split("-")[1].replace("GB", "").trim()) |
| | if (vram - total_size > 0.5) { |
| | result_total_el.style.backgroundColor = "#bef264" |
| | } else if (vram - total_size > 0) { |
| | result_total_el.style.backgroundColor = "#facc15" |
| | } else { |
| | result_total_el.style.backgroundColor = "#ef4444" |
| | } |
| | } |
| | } catch(e) { |
| | alert(e); |
| | } |
| | } |
| | </script> |
| | <link href="./styles.css" rel="stylesheet"> |
| | <title>Can I run it? - LLM VRAM Calculator</title> |
| | </head> |
| | <body class="p-8"> |
| | <div x-data="{ format: 'gguf' }" class="flex flex-col max-h-screen items-center mt-16 gap-10"> |
| | <h1 class="text-xl font-semibold leading-6 text-gray-900"> |
| | LLM Model, Can I run it? |
| | </h1> |
| | <div class="flex flex-col gap-10"> |
| | <div class="w-auto flex flex-col gap-4"> |
| | |
| | <div |
| | class="relative" |
| | x-data="{ |
| | results: null, |
| | query: null |
| | }" |
| | > |
| | <label |
| | for="gpusearch" |
| | class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
| | >GPU (optional)</label |
| | > |
| | <input |
| | class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| | placeholder="GeForce RTX 3090 - 24 GB" |
| | id="gpusearch" |
| | name="gpusearch" |
| | list="gpulist" |
| | x-model="query" |
| | @keypress.debounce.150ms="results = query === '' ? [] : formatGpu(tableToObj(strToHtml(await fetch('https://corsproxy.io/?https://www.techpowerup.com/gpu-specs/?ajaxsrch=' + query).then(r => r.text())).querySelector('table')))" |
| | /> |
| | <datalist id="gpulist"> |
| | <template x-for="item in results"> |
| | <option :value="item" x-text="item"></option> |
| | </template> |
| | </datalist> |
| | </div> |
| | |
| |
|
| |
|
| | <div class="flex flex-row gap-4 relative"> |
| | <label |
| | for="contextsize" |
| | class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
| | > |
| | Model (unquantized) |
| | </label> |
| | <div |
| | class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| | x-data="{ |
| | open: false, |
| | value: 'mistralai/Mistral-7B-v0.1', |
| | results: null, |
| | toggle() { |
| | if (this.open) { |
| | return this.close() |
| | } |
| | |
| | this.$refs.input.focus() |
| | |
| | this.open = true |
| | }, |
| | close(focusAfter) { |
| | if (! this.open) return |
| | |
| | this.open = false |
| | |
| | focusAfter && focusAfter.focus() |
| | } |
| | }" |
| | x-on:keydown.escape.prevent.stop="close($refs.input)" |
| | x-id="['model-typeahead']" |
| | class="relative" |
| | > |
| | |
| | <input |
| | id="modelsearch" |
| | x-ref="input" |
| | x-on:click="toggle()" |
| | @keypress.debounce.150ms="results = (await |
| | fetch('https://huggingface.co/api/quicksearch?type=model&q=' + |
| | encodeURIComponent(value)).then(r => r.json())).models.filter(m => !m.id.includes('GGUF') && !m.id.includes('AWQ') && !m.id.includes('GPTQ') && !m.id.includes('exl2'));" |
| | :aria-expanded="open" |
| | :aria-controls="$id('model-typeahead')" |
| | x-model="value" |
| | class="flex justify-between items-center gap-2 w-full" |
| | /> |
| |
|
| | |
| | <div |
| | x-ref="panel" |
| | x-show="open" |
| | x-transition.origin.top.left |
| | x-on:click.outside="close($refs.input)" |
| | :id="$id('model-typeahead')" |
| | style="display: none" |
| | class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10" |
| | > |
| | <template x-for="result in results"> |
| | <a |
| | @click="value = result.id; close($refs.input)" |
| | x-text="result.id" |
| | class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500" |
| | ></a> |
| | </template> |
| | </div> |
| | </div> |
| | </div> |
| |
|
| |
|
| | |
| | <div class="relative"> |
| | <label |
| | for="contextsize" |
| | class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
| | > |
| | Context Size |
| | </label> |
| | <input |
| | value="8192" |
| | type="number" |
| | name="contextsize" |
| | id="contextsize" |
| | step="1024" |
| | class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| | /> |
| | </div> |
| | |
| | <div class="relative"> |
| | <label |
| | class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
| | >Quant Format</label |
| | > |
| | <fieldset |
| | x-model="format" |
| | class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| | > |
| | <legend class="sr-only">Quant format</legend> |
| | <div |
| | class="space-y-4 sm:flex sm:items-center sm:space-x-10 sm:space-y-0" |
| | > |
| | <div class="flex items-center"> |
| | <input |
| | id="gguf-format" |
| | name="quant-format" |
| | type="radio" |
| | value="gguf" |
| | checked |
| | class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" |
| | /> |
| | <label |
| | for="gguf-format" |
| | class="ml-3 block text-sm font-medium leading-6 text-gray-900" |
| | >GGUF</label |
| | > |
| | </div> |
| | <div class="flex items-center"> |
| | <input |
| | id="exl2-format" |
| | name="quant-format" |
| | type="radio" |
| | value="exl2" |
| | class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" |
| | /> |
| | <label |
| | for="exl2-format" |
| | class="ml-3 block text-sm font-medium leading-6 text-gray-900" |
| | >EXL2</label |
| | > |
| | </div> |
| | <div class="flex items-center"> |
| | <input |
| | id="gptq-format" |
| | name="quant-format" |
| | type="radio" |
| | disabled |
| | value="gptq" |
| | class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" |
| | /> |
| | <label |
| | for="gptq-format" |
| | class="ml-3 block text-sm font-medium leading-6 text-gray-900" |
| | >GPTQ (coming soon)</label |
| | > |
| | </div> |
| | </div> |
| | </fieldset> |
| | </div> |
| | |
| | <div x-show="format === 'exl2'" class="flex flex-row gap-4"> |
| | <div class="relative flex-grow"> |
| | <label |
| | for="bpw" |
| | class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
| | > |
| | BPW |
| | </label> |
| | <input |
| | value="4.5" |
| | type="number" |
| | step="0.01" |
| | id="bpw" |
| | name="bpw" |
| | class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| | /> |
| | </div> |
| | <div |
| | class="flex-shrink relative rounded-md" |
| | > |
| | <div |
| | class="w-fit p-3 h-full flex items-center gap-2 justify-center rounded-md border-0 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| | > |
| | <label |
| | for="kvCache" |
| | class="inline-block bg-white text-xs font-medium text-gray-900" |
| | > |
| | KV Cache |
| | </label> |
| | <select id="kvCache" name="kvCache"> |
| | <option value="16">16 bit</option> |
| | <option value="8">8 bit</option> |
| | <option value="4">4 bit</option> |
| | </select> |
| | </div> |
| | </div> |
| | </div> |
| | |
| | <div x-show="format === 'gguf'" class="relative"> |
| | <div class="flex flex-row gap-4"> |
| | <label |
| | for="contextsize" |
| | class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
| | > |
| | Quantization Size |
| | </label> |
| | <div |
| | class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| | x-data="{ |
| | open: false, |
| | value: '', |
| | toggle() { |
| | if (this.open) { |
| | return this.close() |
| | } |
| | |
| | this.$refs.button.focus() |
| | |
| | this.open = true |
| | }, |
| | close(focusAfter) { |
| | if (! this.open) return |
| | |
| | this.open = false |
| | |
| | focusAfter && focusAfter.focus() |
| | } |
| | }" |
| | x-on:keydown.escape.prevent.stop="close($refs.button)" |
| | x-id="['dropdown-button']" |
| | class="relative" |
| | > |
| | |
| | <button |
| | x-ref="button" |
| | x-on:click="toggle()" |
| | :aria-expanded="open" |
| | :aria-controls="$id('dropdown-button')" |
| | type="button" |
| | id="quantsize" |
| | x-text="value.length === 0 ? 'Q4_K_S' : value" |
| | class="flex justify-between items-center gap-2 w-full" |
| | > |
| | Q4_K_S |
| |
|
| | |
| | <svg |
| | xmlns="http://www.w3.org/2000/svg" |
| | class="h-5 w-5 text-gray-400" |
| | viewBox="0 0 20 20" |
| | fill="currentColor" |
| | > |
| | <path |
| | fill-rule="evenodd" |
| | d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z" |
| | clip-rule="evenodd" |
| | /> |
| | </svg> |
| | </button> |
| |
|
| | |
| | <div |
| | x-data="{ quants: [ |
| | 'Q3_K_S', |
| | 'Q3_K_M', |
| | 'Q3_K_L', |
| | 'Q4_0', |
| | 'Q4_K_S', |
| | 'Q4_K_M', |
| | 'Q5_0', |
| | 'Q5_K_S', |
| | 'Q5_K_M', |
| | 'Q6_K', |
| | 'Q8_0' |
| | ]}" |
| | x-ref="panel" |
| | x-show="open" |
| | x-transition.origin.top.left |
| | x-on:click.outside="close($refs.button)" |
| | :id="$id('dropdown-button')" |
| | style="display: none" |
| | class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10" |
| | > |
| | <template x-for="quant in quants"> |
| | <a |
| | @click="value = quant; close($refs.button)" |
| | x-text="quant" |
| | class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500" |
| | ></a> |
| | </template> |
| | </div> |
| | </div> |
| | <div class="relative"> |
| | <label |
| | for="batchsize" |
| | class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
| | > |
| | Batch Size |
| | </label> |
| | <input |
| | value="512" |
| | type="number" |
| | step="128" |
| | id="batchsize" |
| | class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| | /> |
| | </div> |
| | </div> |
| | </div> |
| | <button |
| | type="button" |
| | class="rounded-md bg-slate-800 px-3 py-2 text-sm font-semibold text-white shadow-sm hover:bg-slate-700 focus-visible:outline focus-visible:outline-2 focus-visible:outline-offset-2 focus-visible:outline-indigo-600" |
| | @click="calculateSizes(format)" |
| | > |
| | Submit |
| | </button> |
| | </div> |
| | <div class="w-auto flex flex-col gap-4"> |
| | <div class="relative"> |
| | <label |
| | class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
| | > |
| | Model Size (GB) |
| | </label> |
| | <div |
| | id="resultmodel" |
| | class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| | >4.20</div> |
| | </div> |
| | <div class="relative"> |
| | <label |
| | class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
| | > |
| | Context Size (GB) |
| | </label> |
| | <div |
| | id="resultcontext" |
| | class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| | >6.90</div> |
| | </div> |
| | <div class="relative"> |
| | <label |
| | class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
| | > |
| | Total Size (GB) |
| | </label> |
| | <div |
| | id="resulttotal" |
| | class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| | >420.69</div> |
| | </div> |
| | </div> |
| | </div> |
| | </div> |
| | <script |
| | src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js" |
| | ></script> |
| | <script defer> |
| | calculateSizes("gguf") |
| | </script> |
| | </body> |
| | </html> |
| |
|