| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>RF-DETR WebGPU</title> |
| <link rel="stylesheet" href="style.css" /> |
| </head> |
| <body> |
|
|
| <h1>RF-DETR WebGPU</h1> |
| <div class="subtitle"> |
| Real-Time Detection Transformers<br> |
| running 100% locally in your browser. |
| </div> |
|
|
| <div class="container"> |
| <div id="status"> |
| <div class="spinner"></div> |
| <div id="status-content"> |
| <div id="status-text">Initializing...</div> |
| <div id="status-sub">Please allow camera access</div> |
| </div> |
| </div> |
| <div id="fps">FPS: 0.0</div> |
| <div id="source-toggle"> |
| <button id="source-webcam" class="source-btn active" title="Webcam"> |
| <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M23 19a2 2 0 0 1-2 2H3a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h4l2-3h6l2 3h4a2 2 0 0 1 2 2z"/><circle cx="12" cy="13" r="4"/></svg> |
| Webcam |
| </button> |
| <button id="source-file" class="source-btn" title="Video File"> |
| <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><polygon points="23 7 16 12 23 17 23 7"/><rect x="1" y="5" width="15" height="14" rx="2" ry="2"/></svg> |
| File |
| </button> |
| <button id="pause-btn" class="source-btn" title="Pause"> |
| <svg id="pause-icon" xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="6" y="4" width="4" height="16"/><rect x="14" y="4" width="4" height="16"/></svg> |
| <svg id="play-icon" xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="display:none"><polygon points="5 3 19 12 5 21 5 3"/></svg> |
| <span id="pause-label">Pause</span> |
| </button> |
| </div> |
| <input type="file" id="file-input" accept="video/*" hidden /> |
| <video id="webcam" autoplay playsinline muted></video> |
| <canvas id="overlay"></canvas> |
| </div> |
|
|
| <div class="controls"> |
| <label class="control-label"> |
| <span>Threshold</span> |
| <input type="range" id="threshold" min="0" max="1" step="0.01" value="0.5"> |
| <span id="thresh-val">0.50</span> |
| </label> |
| <div class="control-divider"></div> |
| <label class="control-label"> |
| <span>Labels (COCO subset)</span> |
| <input type="text" id="allowed-labels" placeholder="e.g. person, car"> |
| </label> |
| </div> |
|
|
| <footer> |
| Powered by <a href="https://github.com/huggingface/transformers.js" target="_blank">Transformers.js v4</a> |
| </footer> |
|
|
| <script type="module"> |
| import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@next'; |
| |
| const video = document.getElementById('webcam'); |
| const overlay = document.getElementById('overlay'); |
| const statusOverlay = document.getElementById('status'); |
| const statusText = document.getElementById('status-text'); |
| const statusSub = document.getElementById('status-sub'); |
| const fpsElem = document.getElementById('fps'); |
| const slider = document.getElementById('threshold'); |
| const sliderVal = document.getElementById('thresh-val'); |
| const btnWebcam = document.getElementById('source-webcam'); |
| const btnFile = document.getElementById('source-file'); |
| const fileInput = document.getElementById('file-input'); |
| const spinner = document.querySelector('.spinner'); |
| const allowedLabelsInput = document.getElementById('allowed-labels'); |
| const pauseBtn = document.getElementById('pause-btn'); |
| const pauseIcon = document.getElementById('pause-icon'); |
| const playIcon = document.getElementById('play-icon'); |
| const pauseLabel = document.getElementById('pause-label'); |
| |
| let detector; |
| let lastTime = performance.now(); |
| let threshold = 0.5; |
| let allowedLabels = null; |
| let paused = false; |
| let webcamStream = null; |
| |
| const inputCanvas = document.createElement('canvas'); |
| const inputCtx = inputCanvas.getContext('2d', { willReadFrequently: true }); |
| const overlayCtx = overlay.getContext('2d'); |
| |
| const COLORS = ['#3b82f6', '#ef4444', '#10b981', '#f59e0b', '#8b5cf6', '#ec4899']; |
| const labelColorMap = new Map(); |
| let nextColorIndex = 0; |
| |
| function getColorForLabel(label) { |
| if (!labelColorMap.has(label)) { |
| labelColorMap.set(label, COLORS[nextColorIndex % COLORS.length]); |
| nextColorIndex++; |
| } |
| return labelColorMap.get(label); |
| } |
| const VIDEO_CONSTRAINTS = { facingMode: 'environment', width: { ideal: 640 }, height: { ideal: 480 } }; |
| |
| |
| let videoRect = { x: 0, y: 0, w: 0, h: 0 }; |
| |
| slider.addEventListener('input', (e) => { |
| threshold = parseFloat(e.target.value); |
| sliderVal.textContent = threshold.toFixed(2); |
| }); |
| |
| allowedLabelsInput.addEventListener('input', (e) => { |
| const val = e.target.value.trim(); |
| allowedLabels = val ? new Set(val.split(',').map(s => s.trim().toLowerCase()).filter(Boolean)) : null; |
| }); |
| |
| |
| function resizeOverlay() { |
| const cw = video.clientWidth; |
| const ch = video.clientHeight; |
| const dpr = window.devicePixelRatio || 1; |
| |
| overlay.width = cw * dpr; |
| overlay.height = ch * dpr; |
| overlayCtx.scale(dpr, dpr); |
| |
| inputCanvas.width = video.videoWidth; |
| inputCanvas.height = video.videoHeight; |
| |
| |
| const vw = video.videoWidth || cw; |
| const vh = video.videoHeight || ch; |
| const videoAR = vw / vh; |
| const containerAR = cw / ch; |
| const drawW = videoAR > containerAR ? cw : ch * videoAR; |
| const drawH = videoAR > containerAR ? cw / videoAR : ch; |
| |
| videoRect = { |
| x: (cw - drawW) / 2, |
| y: (ch - drawH) / 2, |
| w: drawW, |
| h: drawH, |
| }; |
| } |
| |
| window.addEventListener('resize', resizeOverlay); |
| |
| |
| async function onVideoReady() { |
| await new Promise(r => video.onloadedmetadata = r); |
| video.play(); |
| resizeOverlay(); |
| } |
| |
| function resume() { |
| if (!paused) return; |
| paused = false; |
| pauseIcon.style.display = ''; |
| playIcon.style.display = 'none'; |
| pauseLabel.textContent = 'Pause'; |
| lastTime = performance.now(); |
| requestAnimationFrame(loop); |
| } |
| |
| |
| async function switchToWebcam() { |
| if (video.src) { |
| URL.revokeObjectURL(video.src); |
| video.removeAttribute('src'); |
| } |
| video.loop = false; |
| |
| try { |
| webcamStream ??= await navigator.mediaDevices.getUserMedia({ video: VIDEO_CONSTRAINTS, audio: false }); |
| video.srcObject = webcamStream; |
| await onVideoReady(); |
| btnWebcam.classList.add('active'); |
| btnFile.classList.remove('active'); |
| resume(); |
| } catch (e) { |
| console.error('Webcam error:', e); |
| } |
| } |
| |
| async function switchToFile(file) { |
| if (webcamStream) { |
| webcamStream.getTracks().forEach(t => t.stop()); |
| webcamStream = null; |
| } |
| video.srcObject = null; |
| video.src = URL.createObjectURL(file); |
| video.loop = true; |
| video.muted = true; |
| await onVideoReady(); |
| btnFile.classList.add('active'); |
| btnWebcam.classList.remove('active'); |
| resume(); |
| } |
| |
| btnWebcam.addEventListener('click', switchToWebcam); |
| btnFile.addEventListener('click', () => fileInput.click()); |
| fileInput.addEventListener('change', (e) => { |
| const file = e.target.files[0]; |
| if (file) switchToFile(file); |
| fileInput.value = ''; |
| }); |
| |
| pauseBtn.addEventListener('click', () => { |
| if (paused) { |
| resume(); |
| video.play(); |
| } else { |
| paused = true; |
| pauseIcon.style.display = 'none'; |
| playIcon.style.display = ''; |
| pauseLabel.textContent = 'Play'; |
| video.pause(); |
| } |
| }); |
| |
| function showError(title, message) { |
| statusText.textContent = title; |
| statusSub.textContent = message; |
| spinner.style.display = 'none'; |
| } |
| |
| |
| try { |
| await switchToWebcam(); |
| } catch (e) { |
| showError("Camera Error", e.message); |
| throw e; |
| } |
| |
| |
| statusText.textContent = "Loading Model..."; |
| statusSub.textContent = "Downloading RF-DETR Medium (fp32)"; |
| |
| try { |
| detector = await pipeline('object-detection', 'onnx-community/rfdetr_medium-ONNX', { |
| device: 'webgpu', |
| dtype: 'fp32', |
| }); |
| |
| |
| statusText.textContent = "Compiling Shaders..."; |
| statusSub.textContent = "This may take a moment"; |
| |
| inputCtx.drawImage(video, 0, 0, inputCanvas.width, inputCanvas.height); |
| await detector(inputCanvas, { threshold: 0.5, percentage: true }); |
| |
| statusOverlay.style.opacity = '0'; |
| setTimeout(() => statusOverlay.style.display = 'none', 300); |
| } catch (e) { |
| showError("Model Error", e.message); |
| throw e; |
| } |
| |
| |
| async function loop() { |
| if (paused) return; |
| |
| const now = performance.now(); |
| const dt = now - lastTime; |
| lastTime = now; |
| |
| if (dt > 0) { |
| fpsElem.textContent = `FPS: ${(1000 / dt).toFixed(1)}`; |
| } |
| |
| inputCtx.drawImage(video, 0, 0, inputCanvas.width, inputCanvas.height); |
| |
| let results = await detector(inputCanvas, { threshold, percentage: true }); |
| if (allowedLabels) { |
| results = results.filter(r => allowedLabels.has(r.label.toLowerCase())); |
| } |
| drawResults(results); |
| |
| requestAnimationFrame(loop); |
| } |
| |
| function drawResults(results) { |
| const { x: vx, y: vy, w, h } = videoRect; |
| |
| |
| overlayCtx.setTransform(1, 0, 0, 1, 0, 0); |
| overlayCtx.clearRect(0, 0, overlay.width, overlay.height); |
| const dpr = window.devicePixelRatio || 1; |
| overlayCtx.setTransform(dpr, 0, 0, dpr, 0, 0); |
| |
| overlayCtx.font = '600 13px system-ui'; |
| overlayCtx.lineWidth = 2.5; |
| |
| for (let i = 0; i < results.length; ++i) { |
| const { box, label, score } = results[i]; |
| const color = getColorForLabel(label); |
| |
| const x1 = vx + box.xmin * w; |
| const y1 = vy + box.ymin * h; |
| const bw = (box.xmax - box.xmin) * w; |
| const bh = (box.ymax - box.ymin) * h; |
| |
| |
| overlayCtx.strokeStyle = color; |
| overlayCtx.beginPath(); |
| overlayCtx.roundRect(x1, y1, bw, bh, 6); |
| overlayCtx.stroke(); |
| |
| |
| const text = `${label} ${(score * 100).toFixed(0)}%`; |
| const textWidth = overlayCtx.measureText(text).width; |
| |
| overlayCtx.fillStyle = color; |
| overlayCtx.beginPath(); |
| overlayCtx.roundRect(x1, y1 - 26, textWidth + 12, 22, 4); |
| overlayCtx.fill(); |
| |
| |
| overlayCtx.fillStyle = 'white'; |
| overlayCtx.fillText(text, x1 + 6, y1 - 9); |
| } |
| } |
| |
| requestAnimationFrame(loop); |
| |
| </script> |
| </body> |
| </html> |