| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /> |
| <script> |
| function strToHtml(str) { |
| let parser = new DOMParser(); |
| return parser.parseFromString(str, "text/html"); |
| } |
| |
| |
| |
| function tableToObj(table) { |
| var rows = table.rows; |
| var propCells = rows[0].cells; |
| var propNames = []; |
| var results = []; |
| var obj, row, cells; |
| |
| |
| |
| |
| for (var i = 0, iLen = propCells.length; i < iLen; i++) { |
| propNames.push( |
| (propCells[i].textContent || propCells[i].innerText).trim() |
| ); |
| } |
| |
| |
| |
| |
| for (var j = 1, jLen = rows.length; j < jLen; j++) { |
| cells = rows[j].cells; |
| obj = {}; |
| |
| for (var k = 0; k < iLen; k++) { |
| obj[propNames[k]] = ( |
| cells[k].textContent || cells[k].innerText |
| ).trim(); |
| } |
| results.push(obj); |
| } |
| return results; |
| } |
| |
| function formatGpu(gpus) { |
| return gpus.map( |
| (g) => `${g["Product Name"]} - ${g["Memory"].split(",")[0]}` |
| ); |
| } |
| |
| const gguf_quants = { |
| "IQ1_S": 1.56, |
| "IQ2_XXS": 2.06, |
| "IQ2_XS": 2.31, |
| "IQ2_S": 2.5, |
| "IQ2_M": 2.7, |
| "IQ3_XXS": 3.06, |
| "IQ3_XS": 3.3, |
| "Q2_K": 3.35, |
| "Q3_K_S": 3.5, |
| "IQ3_S": 3.5, |
| "IQ3_M": 3.7, |
| "Q3_K_M": 3.91, |
| "Q3_K_L": 4.27, |
| "IQ4_XS": 4.25, |
| "IQ4_NL": 4.5, |
| "Q4_0": 4.55, |
| "Q4_K_S": 4.58, |
| "Q4_K_M": 4.85, |
| "Q5_0": 5.54, |
| "Q5_K_S": 5.54, |
| "Q5_K_M": 5.69, |
| "Q6_K": 6.59, |
| "Q8_0": 8.5, |
| } |
| |
| async function modelConfig(hf_model, hf_token) { |
| auth = hf_token == "" ? {} : { |
| headers: { |
| 'Authorization': `Bearer ${hf_token}` |
| } |
| } |
| let config = await fetch( |
| `https://huggingface.co/${hf_model}/raw/main/config.json`, auth |
| ).then(r => r.json()) |
| let model_size = 0 |
| try { |
| model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/model.safetensors.index.json`, auth).then(r => r.json()))["metadata"]["total_size"] / 2 |
| if (isNaN(model_size)) { |
| throw new Erorr("no size in safetensors metadata") |
| } |
| } catch (e) { |
| try { |
| model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/pytorch_model.bin.index.json`, auth).then(r => r.json()))["metadata"]["total_size"] / 2 |
| if (isNaN(model_size)) { |
| throw new Erorr("no size in pytorch metadata") |
| } |
| } catch { |
| let model_page = await fetch( |
| "https://corsproxy.io/?" + encodeURIComponent(`https://huggingface.co/${hf_model}`) |
| ).then(r => r.text()) |
| let el = document.createElement( 'html' ); |
| el.innerHTML = model_page |
| let params_el = el.querySelector('div[data-target="ModelSafetensorsParams"]') |
| if (params_el !== null) { |
| model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["safetensors"]["total"] |
| } else { |
| params_el = el.querySelector('div[data-target="ModelHeader"]') |
| model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["model"]["safetensors"]["total"] |
| } |
| } |
| } |
| config.parameters = model_size |
| return config |
| } |
| |
| function inputBuffer(context=8192, model_config, bsz=512) { |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| const inp_tokens = bsz |
| const inp_embd = model_config["hidden_size"] * bsz |
| const inp_pos = bsz |
| const inp_KQ_mask = context * bsz |
| const inp_K_shift = context |
| const inp_sum = bsz |
| |
| return inp_tokens + inp_embd + inp_pos + inp_KQ_mask + inp_K_shift + inp_sum |
| } |
| |
| function computeBuffer(context=8192, model_config, bsz=512) { |
| if (bsz != 512) { |
| alert("batch size other than 512 is currently not supported for the compute buffer, using batchsize 512 for compute buffer calculation, end result result will be an overestimatition") |
| } |
| return (context / 1024 * 2 + 0.75) * model_config["num_attention_heads"] * 1024 * 1024 |
| } |
| |
| function kvCache(context=8192, model_config, cache_bit=16) { |
| const n_gqa = model_config["num_attention_heads"] / model_config["num_key_value_heads"] |
| const n_embd_gqa = model_config["hidden_size"] / n_gqa |
| const n_elements = n_embd_gqa * (model_config["num_hidden_layers"] * context) |
| const size = 2 * n_elements |
| return size * (cache_bit / 8) |
| } |
| |
| function contextSize(context=8192, model_config, bsz=512, cache_bit=16) { |
| return Number.parseFloat((inputBuffer(context, model_config, bsz) + kvCache(context, model_config, cache_bit) + computeBuffer(context, model_config, bsz)).toFixed(2)) |
| } |
| |
| function modelSize(model_config, bpw=4.5) { |
| return Number.parseFloat((model_config["parameters"] * bpw / 8).toFixed(2)) |
| } |
| |
| async function calculateSizes(format) { |
| try { |
| const model_config = await modelConfig(document.getElementById("modelsearch").value, document.getElementById("hf_token").value) |
| const context = parseInt(document.getElementById("contextsize").value) |
| let bsz = 512 |
| let cache_bit = 16 |
| let bpw = 0 |
| if (format === "gguf") { |
| bsz = parseInt(document.getElementById("batchsize").value) |
| bpw = gguf_quants[document.getElementById("quantsize").innerText] |
| |
| } else if (format == "exl2") { |
| cache_bit = Number.parseInt(document.getElementById("kvCache").value) |
| bpw = Number.parseFloat(document.getElementById("bpw").value) |
| } |
| |
| const model_size = modelSize(model_config, bpw) |
| const context_size = contextSize(context, model_config, bsz, cache_bit) |
| const total_size = ((model_size + context_size) / 2**30) |
| document.getElementById("resultmodel").innerText = (model_size / 2**30).toFixed(2) |
| document.getElementById("resultcontext").innerText = (context_size / 2**30).toFixed(2) |
| const result_total_el = document.getElementById("resulttotal"); |
| result_total_el.innerText = total_size.toFixed(2) |
| |
| const gpu = document.getElementById("gpusearch").value |
| if (gpu !== "") { |
| const vram = parseFloat(gpu.split("-")[1].replace("GB", "").trim()) |
| if (vram - total_size > 0.5) { |
| result_total_el.style.backgroundColor = "#bef264" |
| } else if (vram - total_size > 0) { |
| result_total_el.style.backgroundColor = "#facc15" |
| } else { |
| result_total_el.style.backgroundColor = "#ef4444" |
| } |
| } |
| } catch(e) { |
| alert(e); |
| } |
| } |
| </script> |
| <link href="./styles.css" rel="stylesheet"> |
| <title>Can I run it? - LLM VRAM Calculator</title> |
| </head> |
| <body class="p-8"> |
| <div x-data="{ format: 'gguf' }" class="flex flex-col max-h-screen items-center mt-16 gap-10"> |
| <h1 class="text-xl font-semibold leading-6 text-gray-900"> |
| LLM Model, Can I run it? |
| </h1> |
| <p> |
| To support gated or private repos, you need to <a href="https://huggingface.co/settings/tokens" style="color: #4444ff"><b>create an authentification token</b></a>, to check the box <span style="color: #6e1818"><b>"Read access to contents of all public gated repos you can access"</b></span> and then enter the token in the field below. |
| </p> |
| |
| <div class="flex flex-col gap-10"> |
| <div class="w-auto flex flex-col gap-4"> |
| |
| <div |
| class="relative" |
| x-data="{ |
| results: null, |
| query: null |
| }" |
| > |
| <label |
| for="gpusearch" |
| class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
| >Huggingface Token (optional)</label |
| > |
| <input |
| class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| id="hf_token" |
| /> |
| </div> |
| |
| <div |
| class="relative" |
| x-data="{ |
| results: null, |
| query: null |
| }" |
| > |
| <label |
| for="gpusearch" |
| class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
| >GPU (optional)</label |
| > |
| <input |
| class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| placeholder="GeForce RTX 3090 - 24 GB" |
| id="gpusearch" |
| name="gpusearch" |
| list="gpulist" |
| x-model="query" |
| @keypress.debounce.150ms="results = query === '' ? [] : formatGpu(tableToObj(strToHtml(await fetch('https://corsproxy.io/?https://www.techpowerup.com/gpu-specs/?ajaxsrch=' + query).then(r => r.text())).querySelector('table')))" |
| /> |
| <datalist id="gpulist"> |
| <template x-for="item in results"> |
| <option :value="item" x-text="item"></option> |
| </template> |
| </datalist> |
| </div> |
| |
|
|
|
|
| <div class="flex flex-row gap-4 relative"> |
| <label |
| for="contextsize" |
| class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
| > |
| Model (unquantized) |
| </label> |
| <div |
| class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| x-data="{ |
| open: false, |
| value: 'Nexusflow/Starling-LM-7B-beta', |
| results: null, |
| toggle() { |
| if (this.open) { |
| return this.close() |
| } |
| |
| this.$refs.input.focus() |
| |
| this.open = true |
| }, |
| close(focusAfter) { |
| if (! this.open) return |
| |
| this.open = false |
| |
| focusAfter && focusAfter.focus() |
| } |
| }" |
| x-on:keydown.escape.prevent.stop="close($refs.input)" |
| x-id="['model-typeahead']" |
| class="relative" |
| > |
| |
| <input |
| id="modelsearch" |
| x-ref="input" |
| x-on:click="toggle()" |
| @keypress.debounce.150ms="results = (await |
| fetch('https://huggingface.co/api/quicksearch?type=model&q=' + |
| encodeURIComponent(value)).then(r => r.json())).models.filter(m => !m.id.includes('GGUF') && !m.id.includes('AWQ') && !m.id.includes('GPTQ') && !m.id.includes('exl2'));" |
| :aria-expanded="open" |
| :aria-controls="$id('model-typeahead')" |
| x-model="value" |
| class="flex justify-between items-center gap-2 w-full" |
| /> |
|
|
| |
| <div |
| x-ref="panel" |
| x-show="open" |
| x-transition.origin.top.left |
| x-on:click.outside="close($refs.input)" |
| :id="$id('model-typeahead')" |
| style="display: none" |
| class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10" |
| > |
| <template x-for="result in results"> |
| <a |
| @click="value = result.id; close($refs.input)" |
| x-text="result.id" |
| class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500" |
| ></a> |
| </template> |
| </div> |
| </div> |
| </div> |
|
|
|
|
| |
| <div class="relative"> |
| <label |
| for="contextsize" |
| class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
| > |
| Context Size |
| </label> |
| <input |
| value="8192" |
| type="number" |
| name="contextsize" |
| id="contextsize" |
| step="1024" |
| class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| /> |
| </div> |
| |
| <div class="relative"> |
| <label |
| class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
| >Quant Format</label |
| > |
| <fieldset |
| x-model="format" |
| class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| > |
| <legend class="sr-only">Quant format</legend> |
| <div |
| class="space-y-4 sm:flex sm:items-center sm:space-x-10 sm:space-y-0" |
| > |
| <div class="flex items-center"> |
| <input |
| id="gguf-format" |
| name="quant-format" |
| type="radio" |
| value="gguf" |
| checked |
| class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" |
| /> |
| <label |
| for="gguf-format" |
| class="ml-3 block text-sm font-medium leading-6 text-gray-900" |
| >GGUF</label |
| > |
| </div> |
| <div class="flex items-center"> |
| <input |
| id="exl2-format" |
| name="quant-format" |
| type="radio" |
| value="exl2" |
| class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" |
| /> |
| <label |
| for="exl2-format" |
| class="ml-3 block text-sm font-medium leading-6 text-gray-900" |
| >EXL2</label |
| > |
| </div> |
| <div class="flex items-center"> |
| <input |
| id="gptq-format" |
| name="quant-format" |
| type="radio" |
| disabled |
| value="gptq" |
| class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" |
| /> |
| <label |
| for="gptq-format" |
| class="ml-3 block text-sm font-medium leading-6 text-gray-900" |
| >GPTQ (coming soon)</label |
| > |
| </div> |
| </div> |
| </fieldset> |
| </div> |
| |
| <div x-show="format === 'exl2'" class="flex flex-row gap-4"> |
| <div class="relative flex-grow"> |
| <label |
| for="bpw" |
| class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
| > |
| BPW |
| </label> |
| <input |
| value="4.5" |
| type="number" |
| step="0.01" |
| id="bpw" |
| name="bpw" |
| class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| /> |
| </div> |
| <div |
| class="flex-shrink relative rounded-md" |
| > |
| <div |
| class="w-fit p-3 h-full flex items-center gap-2 justify-center rounded-md border-0 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| > |
| <label |
| for="kvCache" |
| class="inline-block bg-white text-xs font-medium text-gray-900" |
| > |
| KV Cache |
| </label> |
| <select id="kvCache" name="kvCache"> |
| <option value="16">16 bit</option> |
| <option value="8">8 bit</option> |
| <option value="4">4 bit</option> |
| </select> |
| </div> |
| </div> |
| </div> |
| |
| <div x-show="format === 'gguf'" class="relative"> |
| <div class="flex flex-row gap-4"> |
| <label |
| for="contextsize" |
| class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
| > |
| Quantization Size |
| </label> |
| <div |
| class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| x-data="{ |
| open: false, |
| value: '', |
| toggle() { |
| if (this.open) { |
| return this.close() |
| } |
| |
| this.$refs.button.focus() |
| |
| this.open = true |
| }, |
| close(focusAfter) { |
| if (! this.open) return |
| |
| this.open = false |
| |
| focusAfter && focusAfter.focus() |
| } |
| }" |
| x-on:keydown.escape.prevent.stop="close($refs.button)" |
| x-id="['dropdown-button']" |
| class="relative" |
| > |
| |
| <button |
| x-ref="button" |
| x-on:click="toggle()" |
| :aria-expanded="open" |
| :aria-controls="$id('dropdown-button')" |
| type="button" |
| id="quantsize" |
| x-text="value.length === 0 ? 'Q4_K_S' : value" |
| class="flex justify-between items-center gap-2 w-full" |
| > |
| Q4_K_S |
|
|
| |
| <svg |
| xmlns="http://www.w3.org/2000/svg" |
| class="h-5 w-5 text-gray-400" |
| viewBox="0 0 20 20" |
| fill="currentColor" |
| > |
| <path |
| fill-rule="evenodd" |
| d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z" |
| clip-rule="evenodd" |
| /> |
| </svg> |
| </button> |
|
|
| |
| <div |
| x-data="{ quants: [ |
| 'IQ1_S', |
| 'IQ2_XXS', |
| 'IQ2_XS', |
| 'IQ2_S', |
| 'IQ2_M', |
| 'IQ3_XXS', |
| 'IQ3_XS', |
| 'Q2_K', |
| 'Q3_K_S', |
| 'IQ3_S', |
| 'IQ3_M', |
| 'Q3_K_M', |
| 'Q3_K_L', |
| 'IQ4_XS', |
| 'IQ4_NL', |
| 'Q4_0', |
| 'Q4_K_S', |
| 'Q4_K_M', |
| 'Q5_0', |
| 'Q5_K_S', |
| 'Q5_K_M', |
| 'Q6_K', |
| 'Q8_0' |
| ]}" |
| x-ref="panel" |
| x-show="open" |
| x-transition.origin.top.left |
| x-on:click.outside="close($refs.button)" |
| :id="$id('dropdown-button')" |
| style="display: none" |
| class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10" |
| > |
| <template x-for="quant in quants"> |
| <a |
| @click="value = quant; close($refs.button)" |
| x-text="quant" |
| class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500" |
| ></a> |
| </template> |
| </div> |
| </div> |
| <div class="relative"> |
| <label |
| for="batchsize" |
| class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
| > |
| Batch Size |
| </label> |
| <input |
| value="512" |
| type="number" |
| step="128" |
| id="batchsize" |
| class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| /> |
| </div> |
| </div> |
| </div> |
| <button |
| type="button" |
| class="rounded-md bg-slate-800 px-3 py-2 text-sm font-semibold text-white shadow-sm hover:bg-slate-700 focus-visible:outline focus-visible:outline-2 focus-visible:outline-offset-2 focus-visible:outline-indigo-600" |
| @click="calculateSizes(format)" |
| > |
| Submit |
| </button> |
| </div> |
| <div class="w-auto flex flex-col gap-4"> |
| <div class="relative"> |
| <label |
| class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
| > |
| Model Size (GB) |
| </label> |
| <div |
| id="resultmodel" |
| class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| >4.20</div> |
| </div> |
| <div class="relative"> |
| <label |
| class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
| > |
| Context Size (GB) |
| </label> |
| <div |
| id="resultcontext" |
| class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| >6.90</div> |
| </div> |
| <div class="relative"> |
| <label |
| class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900" |
| > |
| Total Size (GB) |
| </label> |
| <div |
| id="resulttotal" |
| class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" |
| >420.69</div> |
| </div> |
| </div> |
| </div> |
| </div> |
| <script |
| src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js" |
| ></script> |
| <script defer> |
| calculateSizes("gguf") |
| </script> |
| </body> |
| </html> |
|
|