|
|
<!DOCTYPE html> |
|
|
<html lang="en"> |
|
|
<head> |
|
|
<meta charset="UTF-8"> |
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
|
<title>RF-DETR WebGPU</title> |
|
|
<link rel="stylesheet" href="style.css" /> |
|
|
</head> |
|
|
<body> |
|
|
|
|
|
<h1>RF-DETR WebGPU</h1> |
|
|
<div class="subtitle"> |
|
|
Real-Time Detection Transformers<br> |
|
|
running 100% locally in your browser. |
|
|
</div> |
|
|
|
|
|
<div class="container"> |
|
|
<div id="status"> |
|
|
<div class="spinner"></div> |
|
|
<div id="status-content"> |
|
|
<div id="status-text">Initializing...</div> |
|
|
<div id="status-sub">Please allow camera access</div> |
|
|
</div> |
|
|
</div> |
|
|
<div id="fps">FPS: 0.0</div> |
|
|
<div id="source-toggle"> |
|
|
<button id="source-webcam" class="source-btn active" title="Webcam"> |
|
|
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M23 19a2 2 0 0 1-2 2H3a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h4l2-3h6l2 3h4a2 2 0 0 1 2 2z"/><circle cx="12" cy="13" r="4"/></svg> |
|
|
Webcam |
|
|
</button> |
|
|
<button id="source-file" class="source-btn" title="Video File"> |
|
|
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><polygon points="23 7 16 12 23 17 23 7"/><rect x="1" y="5" width="15" height="14" rx="2" ry="2"/></svg> |
|
|
File |
|
|
</button> |
|
|
<button id="pause-btn" class="source-btn" title="Pause"> |
|
|
<svg id="pause-icon" xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="6" y="4" width="4" height="16"/><rect x="14" y="4" width="4" height="16"/></svg> |
|
|
<svg id="play-icon" xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="display:none"><polygon points="5 3 19 12 5 21 5 3"/></svg> |
|
|
<span id="pause-label">Pause</span> |
|
|
</button> |
|
|
</div> |
|
|
<input type="file" id="file-input" accept="video/*" hidden /> |
|
|
<video id="webcam" autoplay playsinline muted></video> |
|
|
<canvas id="overlay"></canvas> |
|
|
</div> |
|
|
|
|
|
<div class="controls"> |
|
|
<label class="control-label"> |
|
|
<span>Threshold</span> |
|
|
<input type="range" id="threshold" min="0" max="1" step="0.01" value="0.5"> |
|
|
<span id="thresh-val">0.50</span> |
|
|
</label> |
|
|
<div class="control-divider"></div> |
|
|
<label class="control-label"> |
|
|
<span>Labels (COCO subset)</span> |
|
|
<input type="text" id="allowed-labels" placeholder="e.g. person, car"> |
|
|
</label> |
|
|
</div> |
|
|
|
|
|
<footer> |
|
|
Powered by <a href="https://github.com/huggingface/transformers.js" target="_blank">Transformers.js v4</a> |
|
|
</footer> |
|
|
|
|
|
<script type="module"> |
|
|
import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@next'; |
|
|
|
|
|
const video = document.getElementById('webcam'); |
|
|
const overlay = document.getElementById('overlay'); |
|
|
const statusOverlay = document.getElementById('status'); |
|
|
const statusText = document.getElementById('status-text'); |
|
|
const statusSub = document.getElementById('status-sub'); |
|
|
const fpsElem = document.getElementById('fps'); |
|
|
const slider = document.getElementById('threshold'); |
|
|
const sliderVal = document.getElementById('thresh-val'); |
|
|
const btnWebcam = document.getElementById('source-webcam'); |
|
|
const btnFile = document.getElementById('source-file'); |
|
|
const fileInput = document.getElementById('file-input'); |
|
|
const spinner = document.querySelector('.spinner'); |
|
|
const allowedLabelsInput = document.getElementById('allowed-labels'); |
|
|
const pauseBtn = document.getElementById('pause-btn'); |
|
|
const pauseIcon = document.getElementById('pause-icon'); |
|
|
const playIcon = document.getElementById('play-icon'); |
|
|
const pauseLabel = document.getElementById('pause-label'); |
|
|
|
|
|
let detector; |
|
|
let lastTime = performance.now(); |
|
|
let threshold = 0.5; |
|
|
let allowedLabels = null; |
|
|
let paused = false; |
|
|
let webcamStream = null; |
|
|
|
|
|
const inputCanvas = document.createElement('canvas'); |
|
|
const inputCtx = inputCanvas.getContext('2d', { willReadFrequently: true }); |
|
|
const overlayCtx = overlay.getContext('2d'); |
|
|
|
|
|
const COLORS = ['#3b82f6', '#ef4444', '#10b981', '#f59e0b', '#8b5cf6', '#ec4899']; |
|
|
const labelColorMap = new Map(); |
|
|
let nextColorIndex = 0; |
|
|
|
|
|
function getColorForLabel(label) { |
|
|
if (!labelColorMap.has(label)) { |
|
|
labelColorMap.set(label, COLORS[nextColorIndex % COLORS.length]); |
|
|
nextColorIndex++; |
|
|
} |
|
|
return labelColorMap.get(label); |
|
|
} |
|
|
const VIDEO_CONSTRAINTS = { facingMode: 'environment', width: { ideal: 640 }, height: { ideal: 480 } }; |
|
|
|
|
|
|
|
|
let videoRect = { x: 0, y: 0, w: 0, h: 0 }; |
|
|
|
|
|
slider.addEventListener('input', (e) => { |
|
|
threshold = parseFloat(e.target.value); |
|
|
sliderVal.textContent = threshold.toFixed(2); |
|
|
}); |
|
|
|
|
|
allowedLabelsInput.addEventListener('input', (e) => { |
|
|
const val = e.target.value.trim(); |
|
|
allowedLabels = val ? new Set(val.split(',').map(s => s.trim().toLowerCase()).filter(Boolean)) : null; |
|
|
}); |
|
|
|
|
|
|
|
|
function resizeOverlay() { |
|
|
const cw = video.clientWidth; |
|
|
const ch = video.clientHeight; |
|
|
const dpr = window.devicePixelRatio || 1; |
|
|
|
|
|
overlay.width = cw * dpr; |
|
|
overlay.height = ch * dpr; |
|
|
overlayCtx.scale(dpr, dpr); |
|
|
|
|
|
inputCanvas.width = video.videoWidth; |
|
|
inputCanvas.height = video.videoHeight; |
|
|
|
|
|
|
|
|
const vw = video.videoWidth || cw; |
|
|
const vh = video.videoHeight || ch; |
|
|
const videoAR = vw / vh; |
|
|
const containerAR = cw / ch; |
|
|
const drawW = videoAR > containerAR ? cw : ch * videoAR; |
|
|
const drawH = videoAR > containerAR ? cw / videoAR : ch; |
|
|
|
|
|
videoRect = { |
|
|
x: (cw - drawW) / 2, |
|
|
y: (ch - drawH) / 2, |
|
|
w: drawW, |
|
|
h: drawH, |
|
|
}; |
|
|
} |
|
|
|
|
|
window.addEventListener('resize', resizeOverlay); |
|
|
|
|
|
|
|
|
async function onVideoReady() { |
|
|
await new Promise(r => video.onloadedmetadata = r); |
|
|
video.play(); |
|
|
resizeOverlay(); |
|
|
} |
|
|
|
|
|
function resume() { |
|
|
if (!paused) return; |
|
|
paused = false; |
|
|
pauseIcon.style.display = ''; |
|
|
playIcon.style.display = 'none'; |
|
|
pauseLabel.textContent = 'Pause'; |
|
|
lastTime = performance.now(); |
|
|
requestAnimationFrame(loop); |
|
|
} |
|
|
|
|
|
|
|
|
async function switchToWebcam() { |
|
|
if (video.src) { |
|
|
URL.revokeObjectURL(video.src); |
|
|
video.removeAttribute('src'); |
|
|
} |
|
|
video.loop = false; |
|
|
|
|
|
try { |
|
|
webcamStream ??= await navigator.mediaDevices.getUserMedia({ video: VIDEO_CONSTRAINTS, audio: false }); |
|
|
video.srcObject = webcamStream; |
|
|
await onVideoReady(); |
|
|
btnWebcam.classList.add('active'); |
|
|
btnFile.classList.remove('active'); |
|
|
resume(); |
|
|
} catch (e) { |
|
|
console.error('Webcam error:', e); |
|
|
} |
|
|
} |
|
|
|
|
|
async function switchToFile(file) { |
|
|
if (webcamStream) { |
|
|
webcamStream.getTracks().forEach(t => t.stop()); |
|
|
webcamStream = null; |
|
|
} |
|
|
video.srcObject = null; |
|
|
video.src = URL.createObjectURL(file); |
|
|
video.loop = true; |
|
|
video.muted = true; |
|
|
await onVideoReady(); |
|
|
btnFile.classList.add('active'); |
|
|
btnWebcam.classList.remove('active'); |
|
|
resume(); |
|
|
} |
|
|
|
|
|
btnWebcam.addEventListener('click', switchToWebcam); |
|
|
btnFile.addEventListener('click', () => fileInput.click()); |
|
|
fileInput.addEventListener('change', (e) => { |
|
|
const file = e.target.files[0]; |
|
|
if (file) switchToFile(file); |
|
|
fileInput.value = ''; |
|
|
}); |
|
|
|
|
|
pauseBtn.addEventListener('click', () => { |
|
|
if (paused) { |
|
|
resume(); |
|
|
video.play(); |
|
|
} else { |
|
|
paused = true; |
|
|
pauseIcon.style.display = 'none'; |
|
|
playIcon.style.display = ''; |
|
|
pauseLabel.textContent = 'Play'; |
|
|
video.pause(); |
|
|
} |
|
|
}); |
|
|
|
|
|
function showError(title, message) { |
|
|
statusText.textContent = title; |
|
|
statusSub.textContent = message; |
|
|
spinner.style.display = 'none'; |
|
|
} |
|
|
|
|
|
|
|
|
try { |
|
|
await switchToWebcam(); |
|
|
} catch (e) { |
|
|
showError("Camera Error", e.message); |
|
|
throw e; |
|
|
} |
|
|
|
|
|
|
|
|
statusText.textContent = "Loading Model..."; |
|
|
statusSub.textContent = "Downloading RF-DETR Medium (fp32)"; |
|
|
|
|
|
try { |
|
|
detector = await pipeline('object-detection', 'onnx-community/rfdetr_medium-ONNX', { |
|
|
device: 'webgpu', |
|
|
dtype: 'fp32', |
|
|
}); |
|
|
|
|
|
|
|
|
statusText.textContent = "Compiling Shaders..."; |
|
|
statusSub.textContent = "This may take a moment"; |
|
|
|
|
|
inputCtx.drawImage(video, 0, 0, inputCanvas.width, inputCanvas.height); |
|
|
await detector(inputCanvas, { threshold: 0.5, percentage: true }); |
|
|
|
|
|
statusOverlay.style.opacity = '0'; |
|
|
setTimeout(() => statusOverlay.style.display = 'none', 300); |
|
|
} catch (e) { |
|
|
showError("Model Error", e.message); |
|
|
throw e; |
|
|
} |
|
|
|
|
|
|
|
|
async function loop() { |
|
|
if (paused) return; |
|
|
|
|
|
const now = performance.now(); |
|
|
const dt = now - lastTime; |
|
|
lastTime = now; |
|
|
|
|
|
if (dt > 0) { |
|
|
fpsElem.textContent = `FPS: ${(1000 / dt).toFixed(1)}`; |
|
|
} |
|
|
|
|
|
inputCtx.drawImage(video, 0, 0, inputCanvas.width, inputCanvas.height); |
|
|
|
|
|
let results = await detector(inputCanvas, { threshold, percentage: true }); |
|
|
if (allowedLabels) { |
|
|
results = results.filter(r => allowedLabels.has(r.label.toLowerCase())); |
|
|
} |
|
|
drawResults(results); |
|
|
|
|
|
requestAnimationFrame(loop); |
|
|
} |
|
|
|
|
|
function drawResults(results) { |
|
|
const { x: vx, y: vy, w, h } = videoRect; |
|
|
|
|
|
|
|
|
overlayCtx.setTransform(1, 0, 0, 1, 0, 0); |
|
|
overlayCtx.clearRect(0, 0, overlay.width, overlay.height); |
|
|
const dpr = window.devicePixelRatio || 1; |
|
|
overlayCtx.setTransform(dpr, 0, 0, dpr, 0, 0); |
|
|
|
|
|
overlayCtx.font = '600 13px system-ui'; |
|
|
overlayCtx.lineWidth = 2.5; |
|
|
|
|
|
for (let i = 0; i < results.length; ++i) { |
|
|
const { box, label, score } = results[i]; |
|
|
const color = getColorForLabel(label); |
|
|
|
|
|
const x1 = vx + box.xmin * w; |
|
|
const y1 = vy + box.ymin * h; |
|
|
const bw = (box.xmax - box.xmin) * w; |
|
|
const bh = (box.ymax - box.ymin) * h; |
|
|
|
|
|
|
|
|
overlayCtx.strokeStyle = color; |
|
|
overlayCtx.beginPath(); |
|
|
overlayCtx.roundRect(x1, y1, bw, bh, 6); |
|
|
overlayCtx.stroke(); |
|
|
|
|
|
|
|
|
const text = `${label} ${(score * 100).toFixed(0)}%`; |
|
|
const textWidth = overlayCtx.measureText(text).width; |
|
|
|
|
|
overlayCtx.fillStyle = color; |
|
|
overlayCtx.beginPath(); |
|
|
overlayCtx.roundRect(x1, y1 - 26, textWidth + 12, 22, 4); |
|
|
overlayCtx.fill(); |
|
|
|
|
|
|
|
|
overlayCtx.fillStyle = 'white'; |
|
|
overlayCtx.fillText(text, x1 + 6, y1 - 9); |
|
|
} |
|
|
} |
|
|
|
|
|
requestAnimationFrame(loop); |
|
|
|
|
|
</script> |
|
|
</body> |
|
|
</html> |