Spaces:
Running
Running
| import { AutoModel, AutoProcessor, RawImage } from "@huggingface/transformers"; | |
| // Reference the elements that we will need | |
| const deviceLabel = document.getElementById("device"); | |
| const status = document.getElementById("status"); | |
| const container = document.getElementById("container"); | |
| const overlay = document.getElementById("overlay"); | |
| const canvas = document.getElementById("canvas"); | |
| const video = document.getElementById("video"); | |
| const thresholdSlider = document.getElementById("threshold"); | |
| const thresholdLabel = document.getElementById("threshold-value"); | |
| const sizeSlider = document.getElementById("size"); | |
| const sizeLabel = document.getElementById("size-value"); | |
| const scaleSlider = document.getElementById("scale"); | |
| const scaleLabel = document.getElementById("scale-value"); | |
| function setStreamSize(width, height) { | |
| video.width = canvas.width = Math.round(width); | |
| video.height = canvas.height = Math.round(height); | |
| // Make sure overlay matches canvas exactly | |
| overlay.style.width = `${canvas.width}px`; | |
| overlay.style.height = `${canvas.height}px`; | |
| } | |
| status.textContent = "Loading model..."; | |
| function getDeviceConfig(deviceParam, dtypeParam) { | |
| const defaultDevice = 'webnn-gpu'; | |
| const defaultDtype = 'fp16'; | |
| const webnnDevices = ['webnn-gpu', 'webnn-cpu', 'webnn-npu']; | |
| const supportedDtypes = ['fp16', 'fp32', 'int8']; | |
| const device = (deviceParam || defaultDevice).toLowerCase(); | |
| const dtype = (dtypeParam && supportedDtypes.includes(dtypeParam.toLowerCase())) | |
| ? dtypeParam.toLowerCase() | |
| : (webnnDevices.includes(device) ? defaultDtype : 'fp32'); | |
| const sessionOptions = { logSeverityLevel: 0 }; | |
| return { device, dtype, sessionOptions }; | |
| } | |
| const urlParams = new URLSearchParams(window.location.search); | |
| let { device, dtype, sessionOptions } = getDeviceConfig(urlParams.get('device'), urlParams.get('dtype')); | |
| let deviceValue = 'WebNN GPU'; | |
| switch (device) { | |
| case 'webgpu': | |
| deviceValue = 'WebGPU'; | |
| break; | |
| case 'webnn-gpu': | |
| deviceValue = 'WebNN GPU'; | |
| break; | |
| case 'webnn-cpu': | |
| deviceValue = 'WebNN CPU'; | |
| break; | |
| case 'webnn-npu': | |
| deviceValue = 'WebNN NPU'; | |
| break; | |
| default: | |
| deviceValue = 'WebNN GPU'; | |
| } | |
| deviceLabel.textContent = deviceValue; | |
| if (!['webgpu', 'webnn-gpu', 'webnn-cpu', 'webnn-npu'].includes(device)) { | |
| status.textContent = `Unsupported device ${device}. Falling back to WebNN GPU.`; | |
| device = 'webnn-gpu'; | |
| } | |
| // Load model and processor | |
| const model_id = "webnn/yolo12n"; | |
| let model, processor; | |
| try { | |
| status.textContent = "Loading model..."; | |
| model = await AutoModel.from_pretrained(model_id, { | |
| device: device, | |
| dtype: dtype, | |
| session_options: sessionOptions | |
| }); | |
| processor = await AutoProcessor.from_pretrained(model_id); | |
| // Configure processor to match model's expected input size (640x640) | |
| processor.feature_extractor.size = { width: 640, height: 640 }; | |
| status.textContent = "Model loaded successfully!"; | |
| } catch (err) { | |
| console.error(err); | |
| let errorMessage = `Error: ${err.message}`; | |
| status.textContent = errorMessage; | |
| status.style.color = "red"; | |
| // Stop execution | |
| throw err; | |
| } | |
| // Set up controls | |
| let scale = 1; | |
| scaleSlider.addEventListener("input", () => { | |
| scale = Number(scaleSlider.value); | |
| setStreamSize(video.videoWidth * scale, video.videoHeight * scale); | |
| scaleLabel.textContent = scale; | |
| }); | |
| scaleSlider.disabled = false; | |
| let threshold = 0.25; | |
| thresholdSlider.addEventListener("input", () => { | |
| threshold = Number(thresholdSlider.value); | |
| thresholdLabel.textContent = threshold.toFixed(2); | |
| }); | |
| thresholdSlider.disabled = false; | |
| let size = 640; | |
| sizeSlider.addEventListener("input", () => { | |
| size = Number(sizeSlider.value); | |
| processor.feature_extractor.size = { width: size, height: size }; | |
| sizeLabel.textContent = size; | |
| }); | |
| sizeSlider.disabled = false; | |
| status.textContent = "Ready"; | |
| const COLOURS = [ | |
| "#EF4444", | |
| "#4299E1", | |
| "#059669", | |
| "#FBBF24", | |
| "#4B52B1", | |
| "#7B3AC2", | |
| "#ED507A", | |
| "#1DD1A1", | |
| "#F3873A", | |
| "#4B5563", | |
| "#DC2626", | |
| "#1852B4", | |
| "#18A35D", | |
| "#F59E0B", | |
| "#4059BE", | |
| "#6027A5", | |
| "#D63D60", | |
| "#00AC9B", | |
| "#E64A19", | |
| "#272A34", | |
| ]; | |
| // Render a bounding box and label on the image | |
| function renderBox(detection, canvasWidth, canvasHeight) { | |
| const { bbox, score, class: classId } = detection; | |
| if (score < threshold) return; // Skip boxes with low confidence | |
| const [x, y, width, height] = bbox; | |
| const color = COLOURS[classId % COLOURS.length]; | |
| // Ensure coordinates are within bounds | |
| const clampedX = Math.max(0, Math.min(x, canvasWidth - width)); | |
| const clampedY = Math.max(0, Math.min(y, canvasHeight - height)); | |
| const clampedWidth = Math.max(1, Math.min(width, canvasWidth - clampedX)); | |
| const clampedHeight = Math.max(1, Math.min(height, canvasHeight - clampedY)); | |
| // Draw the box | |
| const boxElement = document.createElement("div"); | |
| boxElement.className = "bounding-box"; | |
| Object.assign(boxElement.style, { | |
| position: "absolute", | |
| left: `${clampedX}px`, | |
| top: `${clampedY}px`, | |
| width: `${clampedWidth}px`, | |
| height: `${clampedHeight}px`, | |
| border: `2px solid ${color}`, | |
| backgroundColor: "transparent", | |
| pointerEvents: "none", | |
| boxSizing: "border-box" | |
| }); | |
| // Draw label | |
| const labelElement = document.createElement("span"); | |
| labelElement.textContent = `${model.config.id2label[classId]} (${(100 * score).toFixed(1)}%)`; | |
| labelElement.className = "bounding-box-label"; | |
| Object.assign(labelElement.style, { | |
| backgroundColor: color, | |
| color: "white", | |
| padding: "2px 6px", | |
| fontSize: "12px", | |
| position: "absolute", | |
| top: "-22px", | |
| left: "0px", | |
| whiteSpace: "nowrap", | |
| borderRadius: "2px" | |
| }); | |
| boxElement.appendChild(labelElement); | |
| overlay.appendChild(boxElement); | |
| } | |
| function calculateIoU(boxA, boxB) { | |
| const [xA, yA, wA, hA] = boxA; | |
| const [xB, yB, wB, hB] = boxB; | |
| const x1 = Math.max(xA, xB); | |
| const y1 = Math.max(yA, yB); | |
| const x2 = Math.min(xA + wA, xB + wB); | |
| const y2 = Math.min(yA + hA, yB + hB); | |
| const intersection = Math.max(0, x2 - x1) * Math.max(0, y2 - y1); | |
| const areaA = wA * hA; | |
| const areaB = wB * hB; | |
| const union = areaA + areaB - intersection; | |
| return intersection / union; | |
| } | |
| function applyNMS(detections, iouThreshold = 0.5) { | |
| // Sort detections by confidence score in descending order | |
| detections.sort((a, b) => b.score - a.score); | |
| const filteredDetections = []; | |
| const used = new Array(detections.length).fill(false); | |
| for (let i = 0; i < detections.length; i++) { | |
| if (used[i]) continue; | |
| const detectionA = detections[i]; | |
| filteredDetections.push(detectionA); | |
| for (let j = i + 1; j < detections.length; j++) { | |
| if (used[j]) continue; | |
| const detectionB = detections[j]; | |
| // Only apply NMS to boxes of the same class | |
| if (detectionA.class === detectionB.class) { | |
| const iou = calculateIoU(detectionA.bbox, detectionB.bbox); | |
| if (iou > iouThreshold) { | |
| used[j] = true; // Suppress overlapping box | |
| } | |
| } | |
| } | |
| } | |
| return filteredDetections; | |
| } | |
| function processDetections(outputs, canvasWidth, canvasHeight) { | |
| // Clear previous detections | |
| overlay.innerHTML = ""; | |
| // Process YOLOv12 outputs | |
| const predictions = outputs.tolist()[0]; // Get the first batch | |
| const numClasses = predictions.length - 4; // Subtract 4 for bbox coordinates | |
| const numPredictions = predictions[0].length; // Number of predictions | |
| let detections = []; | |
| // Process each prediction | |
| for (let i = 0; i < numPredictions; i++) { | |
| const x = predictions[0][i]; // center x (0-640) | |
| const y = predictions[1][i]; // center y (0-640) | |
| const w = predictions[2][i]; // width (0-640) | |
| const h = predictions[3][i]; // height (0-640) | |
| let maxScore = 0; | |
| let maxClassIndex = -1; | |
| for (let c = 0; c < numClasses; c++) { | |
| const score = predictions[c + 4][i]; | |
| if (score > maxScore) { | |
| maxScore = score; | |
| maxClassIndex = c; | |
| } | |
| } | |
| if (maxScore < threshold) continue; | |
| // Convert from center coordinates to top-left coordinates | |
| // Scale from 640x640 model output to canvas dimensions | |
| const scaleX = canvasWidth / 640; | |
| const scaleY = canvasHeight / 640; | |
| const centerX = x * scaleX; | |
| const centerY = y * scaleY; | |
| const boxWidth = w * scaleX; | |
| const boxHeight = h * scaleY; | |
| const xmin = centerX - (boxWidth / 2); | |
| const ymin = centerY - (boxHeight / 2); | |
| detections.push({ | |
| bbox: [xmin, ymin, boxWidth, boxHeight], | |
| score: maxScore, | |
| class: maxClassIndex, | |
| }); | |
| } | |
| // Apply Non-Maximum Suppression to remove duplicate detections | |
| const filteredDetections = applyNMS(detections, 0.45); // Lower IoU threshold for better suppression | |
| // Debug: Log detection info | |
| if (filteredDetections.length > 0) { | |
| console.log(`Found ${filteredDetections.length} detections:`, | |
| filteredDetections.map(d => ({ | |
| class: model.config.id2label[d.class], | |
| score: d.score.toFixed(3), | |
| bbox: d.bbox.map(v => Math.round(v)) | |
| })) | |
| ); | |
| } | |
| // Render filtered detections | |
| filteredDetections.forEach((detection) => { | |
| renderBox(detection, canvasWidth, canvasHeight); | |
| }); | |
| return filteredDetections.length; | |
| } | |
| let isProcessing = false; | |
| let previousTime; | |
| const context = canvas.getContext("2d", { willReadFrequently: true }); | |
| function updateCanvas() { | |
| const { width, height } = canvas; | |
| context.drawImage(video, 0, 0, width, height); | |
| if (!isProcessing) { | |
| isProcessing = true; | |
| (async function () { | |
| try { | |
| // Read the current frame from the video | |
| const pixelData = context.getImageData(0, 0, width, height).data; | |
| const image = new RawImage(pixelData, width, height, 4); | |
| // Process the image and run the model | |
| const inputs = await processor(image); | |
| const { outputs } = await model(inputs); | |
| // Process detections and render boxes | |
| const detectionCount = processDetections(outputs, width, height); | |
| if (previousTime !== undefined) { | |
| const fps = 1000 / (performance.now() - previousTime); | |
| status.textContent = `FPS: ${fps.toFixed(2)} | Detections: ${detectionCount}`; | |
| } | |
| previousTime = performance.now(); | |
| } catch (error) { | |
| console.error("Detection error:", error); | |
| status.textContent = `Error: ${error.message}`; | |
| } finally { | |
| isProcessing = false; | |
| } | |
| })(); | |
| } | |
| window.requestAnimationFrame(updateCanvas); | |
| } | |
| // Start the video stream | |
| navigator.mediaDevices | |
| .getUserMedia( | |
| { video: true }, // Ask for video | |
| ) | |
| .then((stream) => { | |
| // Set up the video and canvas elements. | |
| video.srcObject = stream; | |
| video.play(); | |
| const videoTrack = stream.getVideoTracks()[0]; | |
| const { width, height } = videoTrack.getSettings(); | |
| setStreamSize(width * scale, height * scale); | |
| // Set container width and height depending on the image aspect ratio | |
| const ar = width / height; | |
| const [cw, ch] = ar > 720 / 405 ? [720, 720 / ar] : [405 * ar, 405]; | |
| container.style.width = `${cw}px`; | |
| container.style.height = `${ch}px`; | |
| // Start the animation loop | |
| window.requestAnimationFrame(updateCanvas); | |
| }) | |
| .catch((error) => { | |
| alert(error); | |
| }); | |