import { AutoModel, AutoProcessor, RawImage } from "@huggingface/transformers"; // Reference the elements that we will need const deviceLabel = document.getElementById("device"); const status = document.getElementById("status"); const container = document.getElementById("container"); const overlay = document.getElementById("overlay"); const canvas = document.getElementById("canvas"); const video = document.getElementById("video"); const thresholdSlider = document.getElementById("threshold"); const thresholdLabel = document.getElementById("threshold-value"); const sizeSlider = document.getElementById("size"); const sizeLabel = document.getElementById("size-value"); const scaleSlider = document.getElementById("scale"); const scaleLabel = document.getElementById("scale-value"); function setStreamSize(width, height) { video.width = canvas.width = Math.round(width); video.height = canvas.height = Math.round(height); // Make sure overlay matches canvas exactly overlay.style.width = `${canvas.width}px`; overlay.style.height = `${canvas.height}px`; } status.textContent = "Loading model..."; function getDeviceConfig(deviceParam, dtypeParam) { const defaultDevice = 'webnn-gpu'; const defaultDtype = 'fp16'; const webnnDevices = ['webnn-gpu', 'webnn-cpu', 'webnn-npu']; const supportedDtypes = ['fp16', 'fp32', 'int8']; const device = (deviceParam || defaultDevice).toLowerCase(); const dtype = (dtypeParam && supportedDtypes.includes(dtypeParam.toLowerCase())) ? dtypeParam.toLowerCase() : (webnnDevices.includes(device) ? defaultDtype : 'fp32'); const sessionOptions = { logSeverityLevel: 0 }; return { device, dtype, sessionOptions }; } const urlParams = new URLSearchParams(window.location.search); let { device, dtype, sessionOptions } = getDeviceConfig(urlParams.get('device'), urlParams.get('dtype')); let deviceValue = 'WebNN GPU'; switch (device) { case 'webgpu': deviceValue = 'WebGPU'; break; case 'webnn-gpu': deviceValue = 'WebNN GPU'; break; case 'webnn-cpu': deviceValue = 'WebNN CPU'; break; case 'webnn-npu': deviceValue = 'WebNN NPU'; break; default: deviceValue = 'WebNN GPU'; } deviceLabel.textContent = deviceValue; if (!['webgpu', 'webnn-gpu', 'webnn-cpu', 'webnn-npu'].includes(device)) { status.textContent = `Unsupported device ${device}. Falling back to WebNN GPU.`; device = 'webnn-gpu'; } // Load model and processor const model_id = "webnn/yolo12n"; let model, processor; try { status.textContent = "Loading model..."; model = await AutoModel.from_pretrained(model_id, { device: device, dtype: dtype, session_options: sessionOptions }); processor = await AutoProcessor.from_pretrained(model_id); // Configure processor to match model's expected input size (640x640) processor.feature_extractor.size = { width: 640, height: 640 }; status.textContent = "Model loaded successfully!"; } catch (err) { console.error(err); let errorMessage = `Error: ${err.message}`; status.textContent = errorMessage; status.style.color = "red"; // Stop execution throw err; } // Set up controls let scale = 1; scaleSlider.addEventListener("input", () => { scale = Number(scaleSlider.value); setStreamSize(video.videoWidth * scale, video.videoHeight * scale); scaleLabel.textContent = scale; }); scaleSlider.disabled = false; let threshold = 0.25; thresholdSlider.addEventListener("input", () => { threshold = Number(thresholdSlider.value); thresholdLabel.textContent = threshold.toFixed(2); }); thresholdSlider.disabled = false; let size = 640; sizeSlider.addEventListener("input", () => { size = Number(sizeSlider.value); processor.feature_extractor.size = { width: size, height: size }; sizeLabel.textContent = size; }); sizeSlider.disabled = false; status.textContent = "Ready"; const COLOURS = [ "#EF4444", "#4299E1", "#059669", "#FBBF24", "#4B52B1", "#7B3AC2", "#ED507A", "#1DD1A1", "#F3873A", "#4B5563", "#DC2626", "#1852B4", "#18A35D", "#F59E0B", "#4059BE", "#6027A5", "#D63D60", "#00AC9B", "#E64A19", "#272A34", ]; // Render a bounding box and label on the image function renderBox(detection, canvasWidth, canvasHeight) { const { bbox, score, class: classId } = detection; if (score < threshold) return; // Skip boxes with low confidence const [x, y, width, height] = bbox; const color = COLOURS[classId % COLOURS.length]; // Ensure coordinates are within bounds const clampedX = Math.max(0, Math.min(x, canvasWidth - width)); const clampedY = Math.max(0, Math.min(y, canvasHeight - height)); const clampedWidth = Math.max(1, Math.min(width, canvasWidth - clampedX)); const clampedHeight = Math.max(1, Math.min(height, canvasHeight - clampedY)); // Draw the box const boxElement = document.createElement("div"); boxElement.className = "bounding-box"; Object.assign(boxElement.style, { position: "absolute", left: `${clampedX}px`, top: `${clampedY}px`, width: `${clampedWidth}px`, height: `${clampedHeight}px`, border: `2px solid ${color}`, backgroundColor: "transparent", pointerEvents: "none", boxSizing: "border-box" }); // Draw label const labelElement = document.createElement("span"); labelElement.textContent = `${model.config.id2label[classId]} (${(100 * score).toFixed(1)}%)`; labelElement.className = "bounding-box-label"; Object.assign(labelElement.style, { backgroundColor: color, color: "white", padding: "2px 6px", fontSize: "12px", position: "absolute", top: "-22px", left: "0px", whiteSpace: "nowrap", borderRadius: "2px" }); boxElement.appendChild(labelElement); overlay.appendChild(boxElement); } function calculateIoU(boxA, boxB) { const [xA, yA, wA, hA] = boxA; const [xB, yB, wB, hB] = boxB; const x1 = Math.max(xA, xB); const y1 = Math.max(yA, yB); const x2 = Math.min(xA + wA, xB + wB); const y2 = Math.min(yA + hA, yB + hB); const intersection = Math.max(0, x2 - x1) * Math.max(0, y2 - y1); const areaA = wA * hA; const areaB = wB * hB; const union = areaA + areaB - intersection; return intersection / union; } function applyNMS(detections, iouThreshold = 0.5) { // Sort detections by confidence score in descending order detections.sort((a, b) => b.score - a.score); const filteredDetections = []; const used = new Array(detections.length).fill(false); for (let i = 0; i < detections.length; i++) { if (used[i]) continue; const detectionA = detections[i]; filteredDetections.push(detectionA); for (let j = i + 1; j < detections.length; j++) { if (used[j]) continue; const detectionB = detections[j]; // Only apply NMS to boxes of the same class if (detectionA.class === detectionB.class) { const iou = calculateIoU(detectionA.bbox, detectionB.bbox); if (iou > iouThreshold) { used[j] = true; // Suppress overlapping box } } } } return filteredDetections; } function processDetections(outputs, canvasWidth, canvasHeight) { // Clear previous detections overlay.innerHTML = ""; // Process YOLOv12 outputs const predictions = outputs.tolist()[0]; // Get the first batch const numClasses = predictions.length - 4; // Subtract 4 for bbox coordinates const numPredictions = predictions[0].length; // Number of predictions let detections = []; // Process each prediction for (let i = 0; i < numPredictions; i++) { const x = predictions[0][i]; // center x (0-640) const y = predictions[1][i]; // center y (0-640) const w = predictions[2][i]; // width (0-640) const h = predictions[3][i]; // height (0-640) let maxScore = 0; let maxClassIndex = -1; for (let c = 0; c < numClasses; c++) { const score = predictions[c + 4][i]; if (score > maxScore) { maxScore = score; maxClassIndex = c; } } if (maxScore < threshold) continue; // Convert from center coordinates to top-left coordinates // Scale from 640x640 model output to canvas dimensions const scaleX = canvasWidth / 640; const scaleY = canvasHeight / 640; const centerX = x * scaleX; const centerY = y * scaleY; const boxWidth = w * scaleX; const boxHeight = h * scaleY; const xmin = centerX - (boxWidth / 2); const ymin = centerY - (boxHeight / 2); detections.push({ bbox: [xmin, ymin, boxWidth, boxHeight], score: maxScore, class: maxClassIndex, }); } // Apply Non-Maximum Suppression to remove duplicate detections const filteredDetections = applyNMS(detections, 0.45); // Lower IoU threshold for better suppression // Debug: Log detection info if (filteredDetections.length > 0) { console.log(`Found ${filteredDetections.length} detections:`, filteredDetections.map(d => ({ class: model.config.id2label[d.class], score: d.score.toFixed(3), bbox: d.bbox.map(v => Math.round(v)) })) ); } // Render filtered detections filteredDetections.forEach((detection) => { renderBox(detection, canvasWidth, canvasHeight); }); return filteredDetections.length; } let isProcessing = false; let previousTime; const context = canvas.getContext("2d", { willReadFrequently: true }); function updateCanvas() { const { width, height } = canvas; context.drawImage(video, 0, 0, width, height); if (!isProcessing) { isProcessing = true; (async function () { try { // Read the current frame from the video const pixelData = context.getImageData(0, 0, width, height).data; const image = new RawImage(pixelData, width, height, 4); // Process the image and run the model const inputs = await processor(image); const { outputs } = await model(inputs); // Process detections and render boxes const detectionCount = processDetections(outputs, width, height); if (previousTime !== undefined) { const fps = 1000 / (performance.now() - previousTime); status.textContent = `FPS: ${fps.toFixed(2)} | Detections: ${detectionCount}`; } previousTime = performance.now(); } catch (error) { console.error("Detection error:", error); status.textContent = `Error: ${error.message}`; } finally { isProcessing = false; } })(); } window.requestAnimationFrame(updateCanvas); } // Start the video stream navigator.mediaDevices .getUserMedia( { video: true }, // Ask for video ) .then((stream) => { // Set up the video and canvas elements. video.srcObject = stream; video.play(); const videoTrack = stream.getVideoTracks()[0]; const { width, height } = videoTrack.getSettings(); setStreamSize(width * scale, height * scale); // Set container width and height depending on the image aspect ratio const ar = width / height; const [cw, ch] = ar > 720 / 405 ? [720, 720 / ar] : [405 * ar, 405]; container.style.width = `${cw}px`; container.style.height = `${ch}px`; // Start the animation loop window.requestAnimationFrame(updateCanvas); }) .catch((error) => { alert(error); });