Spaces:
Running
Running
| <html> | |
| <head> | |
| <meta charset="UTF-8" /> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /> | |
| <style> | |
| @import url("https://fonts.googleapis.com/css2?family=Source+Code+Pro:wght@200;300;400&family=Source+Sans+3:wght@100;200;300;400;500;600;700;800;900&display=swap"); | |
| html, | |
| body { | |
| font-family: "Source Sans 3", sans-serif; | |
| } | |
| </style> | |
| <title>Candle Blip Image Captioning Demo</title> | |
| <script src="https://cdn.tailwindcss.com/3.4.3"></script> | |
| <script type="module" src="./code.js"></script> | |
| <script type="module"> | |
| const MODELS = { | |
| blip_image_quantized_q4k: { | |
| base_url: "https://huggingface.co/lmz/candle-blip/resolve/main/", | |
| model: "blip-image-captioning-large-q4k.gguf", | |
| config: "config.json", | |
| tokenizer: "tokenizer.json", | |
| quantized: true, | |
| size: "271 MB", | |
| }, | |
| blip_image_quantized_q80: { | |
| base_url: "https://huggingface.co/lmz/candle-blip/resolve/main/", | |
| model: "blip-image-captioning-large-q80.gguf", | |
| config: "config.json", | |
| tokenizer: "tokenizer.json", | |
| quantized: true, | |
| size: "505 MB", | |
| }, | |
| blip_image_large: { | |
| base_url: | |
| "https://huggingface.co/Salesforce/blip-image-captioning-large/resolve/refs%2Fpr%2F18/", | |
| model: "model.safetensors", | |
| config: "config.json", | |
| tokenizer: "tokenizer.json", | |
| quantized: false, | |
| size: "1.88 GB", | |
| }, | |
| }; | |
| const blipWorker = new Worker("./blipWorker.js", { | |
| type: "module", | |
| }); | |
| const outputStatusEl = document.querySelector("#output-status"); | |
| const outputCaptionEl = document.querySelector("#output-caption"); | |
| const modelSelectEl = document.querySelector("#model"); | |
| const clearBtn = document.querySelector("#clear-btn"); | |
| const fileUpload = document.querySelector("#file-upload"); | |
| const dropArea = document.querySelector("#drop-area"); | |
| const imagesExamples = document.querySelector("#image-select"); | |
| const canvas = document.querySelector("#canvas"); | |
| const ctxCanvas = canvas.getContext("2d"); | |
| let isCaptioning = false; | |
| let currentImageURL = null; | |
| clearBtn.addEventListener("click", () => { | |
| clearImageCanvas(); | |
| }); | |
| modelSelectEl.addEventListener("change", () => { | |
| if (currentImageURL) { | |
| runInference(currentImageURL); | |
| } | |
| }); | |
| //add event listener to file input | |
| fileUpload.addEventListener("input", async (e) => { | |
| const target = e.target; | |
| if (target.files.length > 0) { | |
| const href = URL.createObjectURL(target.files[0]); | |
| clearImageCanvas(); | |
| await drawImageCanvas(href); | |
| runInference(href); | |
| } | |
| }); | |
| // add event listener to drop-area | |
| dropArea.addEventListener("dragenter", (e) => { | |
| e.preventDefault(); | |
| dropArea.classList.add("border-blue-700"); | |
| }); | |
| dropArea.addEventListener("dragleave", (e) => { | |
| e.preventDefault(); | |
| dropArea.classList.remove("border-blue-700"); | |
| }); | |
| dropArea.addEventListener("dragover", (e) => { | |
| e.preventDefault(); | |
| }); | |
| dropArea.addEventListener("drop", async (e) => { | |
| e.preventDefault(); | |
| dropArea.classList.remove("border-blue-700"); | |
| const url = e.dataTransfer.getData("text/uri-list"); | |
| const files = e.dataTransfer.files; | |
| if (files.length > 0) { | |
| const href = URL.createObjectURL(files[0]); | |
| clearImageCanvas(); | |
| await drawImageCanvas(href); | |
| runInference(href); | |
| } else if (url) { | |
| clearImageCanvas(); | |
| await drawImageCanvas(url); | |
| runInference(url); | |
| } | |
| }); | |
| imagesExamples.addEventListener("click", async (e) => { | |
| if (isCaptioning) { | |
| return; | |
| } | |
| const target = e.target; | |
| if (target.nodeName === "IMG") { | |
| const href = target.src; | |
| clearImageCanvas(); | |
| await drawImageCanvas(href); | |
| runInference(href); | |
| } | |
| }); | |
| function clearImageCanvas() { | |
| ctxCanvas.clearRect(0, 0, canvas.width, canvas.height); | |
| isCaptioning = false; | |
| clearBtn.disabled = true; | |
| canvas.parentElement.style.height = "auto"; | |
| outputStatusEl.hidden = false; | |
| outputCaptionEl.hidden = true; | |
| outputStatusEl.innerText = "Please select an image"; | |
| currentImageURL = null; | |
| } | |
| async function drawImageCanvas(imgURL) { | |
| if (!imgURL) { | |
| throw new Error("No image URL provided"); | |
| } | |
| return new Promise((resolve, reject) => { | |
| ctxCanvas.clearRect(0, 0, canvas.width, canvas.height); | |
| ctxCanvas.clearRect(0, 0, canvas.width, canvas.height); | |
| const img = new Image(); | |
| img.crossOrigin = "anonymous"; | |
| img.onload = () => { | |
| canvas.width = img.width; | |
| canvas.height = img.height; | |
| ctxCanvas.drawImage(img, 0, 0); | |
| canvas.parentElement.style.height = canvas.offsetHeight + "px"; | |
| clearBtn.disabled = false; | |
| resolve(img); | |
| }; | |
| img.src = imgURL; | |
| currentImageURL = imgURL; | |
| }); | |
| } | |
| document.addEventListener("DOMContentLoaded", () => { | |
| for (const [id, model] of Object.entries(MODELS)) { | |
| const option = document.createElement("option"); | |
| option.value = id; | |
| option.innerText = `${id} (${model.size})`; | |
| modelSelectEl.appendChild(option); | |
| } | |
| }); | |
| async function getImageCaption( | |
| worker, | |
| weightsURL, | |
| tokenizerURL, | |
| configURL, | |
| modelID, | |
| imageURL, | |
| quantized, | |
| updateStatus = null | |
| ) { | |
| return new Promise((resolve, reject) => { | |
| worker.postMessage({ | |
| weightsURL, | |
| tokenizerURL, | |
| configURL, | |
| modelID, | |
| imageURL, | |
| quantized, | |
| }); | |
| function messageHandler(event) { | |
| if ("error" in event.data) { | |
| worker.removeEventListener("message", messageHandler); | |
| reject(new Error(event.data.error)); | |
| } | |
| if (event.data.status === "complete") { | |
| worker.removeEventListener("message", messageHandler); | |
| resolve(event.data); | |
| } | |
| if (updateStatus) updateStatus(event.data); | |
| } | |
| worker.addEventListener("message", messageHandler); | |
| }); | |
| } | |
| function updateStatus(data) { | |
| if (data.status === "status") { | |
| outputStatusEl.innerText = data.message; | |
| } | |
| } | |
| async function runInference(imageURL) { | |
| if (isCaptioning || !imageURL) { | |
| alert("Please select an image first"); | |
| return; | |
| } | |
| outputStatusEl.hidden = false; | |
| outputCaptionEl.hidden = true; | |
| clearBtn.disabled = true; | |
| modelSelectEl.disabled = true; | |
| isCaptioning = true; | |
| const selectedModel = modelSelectEl.value; | |
| const model = MODELS[selectedModel]; | |
| const weightsURL = `${model.base_url}${model.model}`; | |
| const tokenizerURL = `${model.base_url}${model.tokenizer}`; | |
| const configURL = `${model.base_url}${model.config}`; | |
| const quantized = model.quantized; | |
| try { | |
| const time = performance.now(); | |
| const caption = await getImageCaption( | |
| blipWorker, | |
| weightsURL, | |
| tokenizerURL, | |
| configURL, | |
| selectedModel, | |
| imageURL, | |
| quantized, | |
| updateStatus | |
| ); | |
| outputStatusEl.hidden = true; | |
| outputCaptionEl.hidden = false; | |
| const totalTime = ((performance.now() - time)/1000).toFixed(2); | |
| outputCaptionEl.innerHTML = `${ | |
| caption.output | |
| }<br/><span class="text-xs">Inference time: ${totalTime} s</span>`; | |
| } catch (err) { | |
| console.error(err); | |
| outputStatusEl.hidden = false; | |
| outputCaptionEl.hidden = true; | |
| outputStatusEl.innerText = err.message; | |
| } | |
| clearBtn.disabled = false; | |
| modelSelectEl.disabled = false; | |
| isCaptioning = false; | |
| } | |
| </script> | |
| </head> | |
| <body class="container max-w-4xl mx-auto p-4"> | |
| <main class="grid grid-cols-1 gap-5 relative"> | |
| <span class="absolute text-5xl -ml-[1em]"> 🕯️ </span> | |
| <div> | |
| <h1 class="text-5xl font-bold">Candle BLIP Image Captioning</h1> | |
| <h2 class="text-2xl font-bold">Rust/WASM Demo</h2> | |
| <p class="max-w-lg"> | |
| <a | |
| href="https://huggingface.co/Salesforce/blip-image-captioning-large" | |
| target="_blank" | |
| class="underline hover:text-blue-500 hover:no-underline" | |
| >BLIP Image Captioning | |
| </a> | |
| running in the browser using | |
| <a | |
| href="https://github.com/huggingface/candle/" | |
| target="_blank" | |
| class="underline hover:text-blue-500 hover:no-underline" | |
| >Candle</a | |
| >, a minimalist ML framework for Rust. | |
| </p> | |
| <p class="text-xs max-w-lg py-2"> | |
| <b>Note:</b> | |
| The image captioning on the smallest model takes about ~50 seconds, it | |
| will vary depending on your machine and model size. | |
| </p> | |
| </div> | |
| <div> | |
| <label for="model" class="font-medium block">Models Options: </label> | |
| <select | |
| id="model" | |
| class="border-2 border-gray-500 rounded-md font-light interactive disabled:cursor-not-allowed w-full max-w-max" | |
| ></select> | |
| </div> | |
| <!-- drag and drop area --> | |
| <div class="grid gap-4 sm:grid-cols-2 py-4"> | |
| <div class="relative max-w-lg"> | |
| <div | |
| class="absolute w-full bottom-full flex justify-between items-center" | |
| > | |
| <div class="flex gap-2 w-full"> | |
| <button | |
| id="clear-btn" | |
| disabled | |
| title="Clear Image" | |
| class="ml-auto text-xs bg-white rounded-md disabled:opacity-50 flex gap-1 items-center" | |
| > | |
| <svg | |
| class="" | |
| xmlns="http://www.w3.org/2000/svg" | |
| viewBox="0 0 13 12" | |
| height="1em" | |
| > | |
| <path | |
| d="M1.6.7 12 11.1M12 .7 1.6 11.1" | |
| stroke="#2E3036" | |
| stroke-width="2" | |
| /> | |
| </svg> | |
| </button> | |
| </div> | |
| </div> | |
| <div | |
| id="drop-area" | |
| class="flex flex-col items-center justify-center border-2 border-gray-300 border-dashed rounded-xl relative aspect-video w-full overflow-hidden" | |
| > | |
| <div | |
| class="flex flex-col items-center justify-center space-y-1 text-center" | |
| > | |
| <svg | |
| width="25" | |
| height="25" | |
| viewBox="0 0 25 25" | |
| fill="none" | |
| xmlns="http://www.w3.org/2000/svg" | |
| > | |
| <path | |
| d="M3.5 24.3a3 3 0 0 1-1.9-.8c-.5-.5-.8-1.2-.8-1.9V2.9c0-.7.3-1.3.8-1.9.6-.5 1.2-.7 2-.7h18.6c.7 0 1.3.2 1.9.7.5.6.7 1.2.7 2v18.6c0 .7-.2 1.4-.7 1.9a3 3 0 0 1-2 .8H3.6Zm0-2.7h18.7V2.9H3.5v18.7Zm2.7-2.7h13.3c.3 0 .5 0 .6-.3v-.7l-3.7-5a.6.6 0 0 0-.6-.2c-.2 0-.4 0-.5.3l-3.5 4.6-2.4-3.3a.6.6 0 0 0-.6-.3c-.2 0-.4.1-.5.3l-2.7 3.6c-.1.2-.2.4 0 .7.1.2.3.3.6.3Z" | |
| fill="#000" | |
| /> | |
| </svg> | |
| <div class="flex text-sm text-gray-600"> | |
| <label | |
| for="file-upload" | |
| class="relative cursor-pointer bg-white rounded-md font-medium text-blue-950 hover:text-blue-700" | |
| > | |
| <span>Drag and drop y our image here</span> | |
| <span class="block text-xs">or</span> | |
| <span class="block text-xs">Click to upload</span> | |
| </label> | |
| </div> | |
| <input | |
| id="file-upload" | |
| name="file-upload" | |
| type="file" | |
| class="sr-only" | |
| /> | |
| </div> | |
| <canvas | |
| id="canvas" | |
| class="absolute pointer-events-none w-full" | |
| ></canvas> | |
| </div> | |
| </div> | |
| <div class=""> | |
| <div | |
| class="h-full bg-slate-100 text-gray-500 p-4 rounded-md flex flex-col gap-2" | |
| > | |
| <p | |
| id="output-caption" | |
| class="m-auto text-xl text-center p-2" | |
| hidden | |
| ></p> | |
| <span id="output-status" class="m-auto font-light"> | |
| Please select an image | |
| </span> | |
| </div> | |
| </div> | |
| </div> | |
| <div> | |
| <div | |
| class="flex gap-3 items-center overflow-x-scroll" | |
| id="image-select" | |
| > | |
| <h3 class="font-medium">Examples:</h3> | |
| <img | |
| src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/candle/examples/sf.jpg" | |
| class="cursor-pointer w-24 h-24 object-cover" | |
| /> | |
| <img | |
| src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/candle/examples/bike.jpeg" | |
| class="cursor-pointer w-24 h-24 object-cover" | |
| /> | |
| <img | |
| src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/candle/examples/000000000077.jpg" | |
| class="cursor-pointer w-24 h-24 object-cover" | |
| /> | |
| </div> | |
| </div> | |
| </main> | |
| </body> | |
| </html> | |