Xenova HF Staff commited on
Commit
e90f38e
·
verified ·
1 Parent(s): edfcf8d

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +213 -88
index.html CHANGED
@@ -23,6 +23,22 @@
23
  </div>
24
  </div>
25
  <div id="fps">FPS: 0.0</div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  <video id="webcam" autoplay playsinline muted></video>
27
  <canvas id="overlay"></canvas>
28
  </div>
@@ -33,6 +49,11 @@
33
  <input type="range" id="threshold" min="0" max="1" step="0.01" value="0.5">
34
  <span id="thresh-val">0.50</span>
35
  </label>
 
 
 
 
 
36
  </div>
37
 
38
  <footer>
@@ -44,160 +65,264 @@
44
 
45
  const video = document.getElementById('webcam');
46
  const overlay = document.getElementById('overlay');
47
- const status = document.getElementById('status');
48
  const statusText = document.getElementById('status-text');
49
  const statusSub = document.getElementById('status-sub');
50
  const fpsElem = document.getElementById('fps');
51
  const slider = document.getElementById('threshold');
52
  const sliderVal = document.getElementById('thresh-val');
 
 
 
 
 
 
 
 
 
53
 
54
  let detector;
55
  let lastTime = performance.now();
56
  let threshold = 0.5;
57
-
 
 
 
58
  const inputCanvas = document.createElement('canvas');
59
  const inputCtx = inputCanvas.getContext('2d', { willReadFrequently: true });
60
-
 
61
  const COLORS = ['#3b82f6', '#ef4444', '#10b981', '#f59e0b', '#8b5cf6', '#ec4899'];
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  slider.addEventListener('input', (e) => {
64
  threshold = parseFloat(e.target.value);
65
  sliderVal.textContent = threshold.toFixed(2);
66
  });
67
-
68
- // Handle high DPI displays
 
 
 
 
 
69
  function resizeOverlay() {
70
- const width = video.clientWidth;
71
- const height = video.clientHeight;
72
  const dpr = window.devicePixelRatio || 1;
73
-
74
- overlay.width = width * dpr;
75
- overlay.height = height * dpr;
76
-
77
- const ctx = overlay.getContext('2d');
78
- ctx.scale(dpr, dpr);
79
-
80
  inputCanvas.width = video.videoWidth;
81
  inputCanvas.height = video.videoHeight;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  }
83
 
84
  window.addEventListener('resize', resizeOverlay);
85
 
86
- // 1. Start Camera
87
- try {
88
- const stream = await navigator.mediaDevices.getUserMedia({
89
- video: {
90
- facingMode: 'environment',
91
- width: { ideal: 640 },
92
- height: { ideal: 480 }
93
- },
94
- audio: false
95
- });
96
-
97
- video.srcObject = stream;
98
  await new Promise(r => video.onloadedmetadata = r);
99
  video.play();
100
  resizeOverlay();
 
 
 
 
 
 
 
 
 
 
 
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  } catch (e) {
103
- statusText.textContent = "Camera Error";
104
- statusSub.textContent = e.message;
105
- document.querySelector('.spinner').style.display = 'none';
106
  throw e;
107
  }
108
 
109
  // 2. Load Model
110
  statusText.textContent = "Loading Model...";
111
- statusSub.textContent = "Downloading RF-DETR Nano (fp32)";
112
-
113
  try {
114
- detector = await pipeline('object-detection', 'onnx-community/rfdetr_nano-ONNX', {
115
  device: 'webgpu',
116
  dtype: 'fp32',
117
  });
118
-
119
  // 3. Warmup
120
  statusText.textContent = "Compiling Shaders...";
121
  statusSub.textContent = "This may take a moment";
122
-
123
  inputCtx.drawImage(video, 0, 0, inputCanvas.width, inputCanvas.height);
124
  await detector(inputCanvas, { threshold: 0.5, percentage: true });
125
 
126
- status.style.opacity = '0';
127
- setTimeout(() => status.style.display = 'none', 300);
128
-
129
  } catch (e) {
130
- statusText.textContent = "Model Error";
131
- statusSub.textContent = e.message;
132
- document.querySelector('.spinner').style.display = 'none';
133
  throw e;
134
  }
135
 
136
  // 4. Render Loop
137
  async function loop() {
 
 
138
  const now = performance.now();
139
  const dt = now - lastTime;
140
  lastTime = now;
141
-
142
  if (dt > 0) {
143
  fpsElem.textContent = `FPS: ${(1000 / dt).toFixed(1)}`;
144
  }
145
 
146
  inputCtx.drawImage(video, 0, 0, inputCanvas.width, inputCanvas.height);
147
 
148
- const results = await detector(inputCanvas, {
149
- threshold: threshold,
150
- percentage: true
151
- });
152
  drawResults(results);
153
 
154
  requestAnimationFrame(loop);
155
  }
156
 
157
  function drawResults(results) {
158
- const ctx = overlay.getContext('2d');
159
- const w = video.clientWidth;
160
- const h = video.clientHeight;
161
-
162
- // Clear canvas
163
- ctx.save();
164
- ctx.setTransform(1, 0, 0, 1, 0, 0);
165
- ctx.clearRect(0, 0, overlay.width, overlay.height);
166
- ctx.restore();
167
-
168
- // Set styles common to all results
169
- ctx.font = '600 13px system-ui';
170
- ctx.lineWidth = 2.5;
171
-
172
- results.forEach((res, i) => {
173
- const { box, label, score } = res;
174
- const color = COLORS[i % COLORS.length];
175
-
176
- const x1 = box.xmin * w;
177
- const y1 = box.ymin * h;
178
- const width = (box.xmax - box.xmin) * w;
179
- const height = (box.ymax - box.ymin) * h;
180
-
181
- // Box
182
- ctx.strokeStyle = color;
183
- ctx.beginPath();
184
- ctx.roundRect(x1, y1, width, height, 6);
185
- ctx.stroke();
186
-
187
- // Label
188
- ctx.fillStyle = color;
189
- const text = `${label} ${(score*100).toFixed(0)}%`;
190
- const textMetrics = ctx.measureText(text);
191
- const textWidth = textMetrics.width;
192
- const textHeight = 22;
193
-
194
- ctx.beginPath();
195
- ctx.roundRect(x1, y1 - textHeight - 4, textWidth + 12, textHeight, 4);
196
- ctx.fill();
197
-
198
- ctx.fillStyle = 'white';
199
- ctx.fillText(text, x1 + 6, y1 - 9);
200
- });
201
  }
202
 
203
  requestAnimationFrame(loop);
 
23
  </div>
24
  </div>
25
  <div id="fps">FPS: 0.0</div>
26
+ <div id="source-toggle">
27
+ <button id="source-webcam" class="source-btn active" title="Webcam">
28
+ <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M23 19a2 2 0 0 1-2 2H3a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h4l2-3h6l2 3h4a2 2 0 0 1 2 2z"/><circle cx="12" cy="13" r="4"/></svg>
29
+ Webcam
30
+ </button>
31
+ <button id="source-file" class="source-btn" title="Video File">
32
+ <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><polygon points="23 7 16 12 23 17 23 7"/><rect x="1" y="5" width="15" height="14" rx="2" ry="2"/></svg>
33
+ File
34
+ </button>
35
+ <button id="pause-btn" class="source-btn" title="Pause">
36
+ <svg id="pause-icon" xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="6" y="4" width="4" height="16"/><rect x="14" y="4" width="4" height="16"/></svg>
37
+ <svg id="play-icon" xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="display:none"><polygon points="5 3 19 12 5 21 5 3"/></svg>
38
+ <span id="pause-label">Pause</span>
39
+ </button>
40
+ </div>
41
+ <input type="file" id="file-input" accept="video/*" hidden />
42
  <video id="webcam" autoplay playsinline muted></video>
43
  <canvas id="overlay"></canvas>
44
  </div>
 
49
  <input type="range" id="threshold" min="0" max="1" step="0.01" value="0.5">
50
  <span id="thresh-val">0.50</span>
51
  </label>
52
+ <div class="control-divider"></div>
53
+ <label class="control-label">
54
+ <span>Labels (COCO subset)</span>
55
+ <input type="text" id="allowed-labels" placeholder="e.g. person, car">
56
+ </label>
57
  </div>
58
 
59
  <footer>
 
65
 
66
  const video = document.getElementById('webcam');
67
  const overlay = document.getElementById('overlay');
68
+ const statusOverlay = document.getElementById('status');
69
  const statusText = document.getElementById('status-text');
70
  const statusSub = document.getElementById('status-sub');
71
  const fpsElem = document.getElementById('fps');
72
  const slider = document.getElementById('threshold');
73
  const sliderVal = document.getElementById('thresh-val');
74
+ const btnWebcam = document.getElementById('source-webcam');
75
+ const btnFile = document.getElementById('source-file');
76
+ const fileInput = document.getElementById('file-input');
77
+ const spinner = document.querySelector('.spinner');
78
+ const allowedLabelsInput = document.getElementById('allowed-labels');
79
+ const pauseBtn = document.getElementById('pause-btn');
80
+ const pauseIcon = document.getElementById('pause-icon');
81
+ const playIcon = document.getElementById('play-icon');
82
+ const pauseLabel = document.getElementById('pause-label');
83
 
84
  let detector;
85
  let lastTime = performance.now();
86
  let threshold = 0.5;
87
+ let allowedLabels = null; // null = no filtering
88
+ let paused = false;
89
+ let webcamStream = null;
90
+
91
  const inputCanvas = document.createElement('canvas');
92
  const inputCtx = inputCanvas.getContext('2d', { willReadFrequently: true });
93
+ const overlayCtx = overlay.getContext('2d');
94
+
95
  const COLORS = ['#3b82f6', '#ef4444', '#10b981', '#f59e0b', '#8b5cf6', '#ec4899'];
96
+ const labelColorMap = new Map();
97
+ let nextColorIndex = 0;
98
+
99
+ function getColorForLabel(label) {
100
+ if (!labelColorMap.has(label)) {
101
+ labelColorMap.set(label, COLORS[nextColorIndex % COLORS.length]);
102
+ nextColorIndex++;
103
+ }
104
+ return labelColorMap.get(label);
105
+ }
106
+ const VIDEO_CONSTRAINTS = { facingMode: 'environment', width: { ideal: 640 }, height: { ideal: 480 } };
107
+
108
+ // Displayed video rect (accounting for object-fit: contain)
109
+ let videoRect = { x: 0, y: 0, w: 0, h: 0 };
110
 
111
  slider.addEventListener('input', (e) => {
112
  threshold = parseFloat(e.target.value);
113
  sliderVal.textContent = threshold.toFixed(2);
114
  });
115
+
116
+ allowedLabelsInput.addEventListener('input', (e) => {
117
+ const val = e.target.value.trim();
118
+ allowedLabels = val ? new Set(val.split(',').map(s => s.trim().toLowerCase()).filter(Boolean)) : null;
119
+ });
120
+
121
+ // Handle high DPI displays and recompute video rect
122
  function resizeOverlay() {
123
+ const cw = video.clientWidth;
124
+ const ch = video.clientHeight;
125
  const dpr = window.devicePixelRatio || 1;
126
+
127
+ overlay.width = cw * dpr;
128
+ overlay.height = ch * dpr;
129
+ overlayCtx.scale(dpr, dpr);
130
+
 
 
131
  inputCanvas.width = video.videoWidth;
132
  inputCanvas.height = video.videoHeight;
133
+
134
+ // Compute the visible region of the video within the element
135
+ const vw = video.videoWidth || cw;
136
+ const vh = video.videoHeight || ch;
137
+ const videoAR = vw / vh;
138
+ const containerAR = cw / ch;
139
+ const drawW = videoAR > containerAR ? cw : ch * videoAR;
140
+ const drawH = videoAR > containerAR ? cw / videoAR : ch;
141
+
142
+ videoRect = {
143
+ x: (cw - drawW) / 2,
144
+ y: (ch - drawH) / 2,
145
+ w: drawW,
146
+ h: drawH,
147
+ };
148
  }
149
 
150
  window.addEventListener('resize', resizeOverlay);
151
 
152
+ // Wait for the video to be ready, then resize the overlay
153
+ async function onVideoReady() {
 
 
 
 
 
 
 
 
 
 
154
  await new Promise(r => video.onloadedmetadata = r);
155
  video.play();
156
  resizeOverlay();
157
+ }
158
+
159
+ function resume() {
160
+ if (!paused) return;
161
+ paused = false;
162
+ pauseIcon.style.display = '';
163
+ playIcon.style.display = 'none';
164
+ pauseLabel.textContent = 'Pause';
165
+ lastTime = performance.now();
166
+ requestAnimationFrame(loop);
167
+ }
168
 
169
+ // Source switching
170
+ async function switchToWebcam() {
171
+ if (video.src) {
172
+ URL.revokeObjectURL(video.src);
173
+ video.removeAttribute('src');
174
+ }
175
+ video.loop = false;
176
+
177
+ try {
178
+ webcamStream ??= await navigator.mediaDevices.getUserMedia({ video: VIDEO_CONSTRAINTS, audio: false });
179
+ video.srcObject = webcamStream;
180
+ await onVideoReady();
181
+ btnWebcam.classList.add('active');
182
+ btnFile.classList.remove('active');
183
+ resume();
184
+ } catch (e) {
185
+ console.error('Webcam error:', e);
186
+ }
187
+ }
188
+
189
+ async function switchToFile(file) {
190
+ if (webcamStream) {
191
+ webcamStream.getTracks().forEach(t => t.stop());
192
+ webcamStream = null;
193
+ }
194
+ video.srcObject = null;
195
+ video.src = URL.createObjectURL(file);
196
+ video.loop = true;
197
+ video.muted = true;
198
+ await onVideoReady();
199
+ btnFile.classList.add('active');
200
+ btnWebcam.classList.remove('active');
201
+ resume();
202
+ }
203
+
204
+ btnWebcam.addEventListener('click', switchToWebcam);
205
+ btnFile.addEventListener('click', () => fileInput.click());
206
+ fileInput.addEventListener('change', (e) => {
207
+ const file = e.target.files[0];
208
+ if (file) switchToFile(file);
209
+ fileInput.value = ''; // reset so same file can be re-selected
210
+ });
211
+
212
+ pauseBtn.addEventListener('click', () => {
213
+ if (paused) {
214
+ resume();
215
+ video.play();
216
+ } else {
217
+ paused = true;
218
+ pauseIcon.style.display = 'none';
219
+ playIcon.style.display = '';
220
+ pauseLabel.textContent = 'Play';
221
+ video.pause();
222
+ }
223
+ });
224
+
225
+ function showError(title, message) {
226
+ statusText.textContent = title;
227
+ statusSub.textContent = message;
228
+ spinner.style.display = 'none';
229
+ }
230
+
231
+ // 1. Start Camera
232
+ try {
233
+ await switchToWebcam();
234
  } catch (e) {
235
+ showError("Camera Error", e.message);
 
 
236
  throw e;
237
  }
238
 
239
  // 2. Load Model
240
  statusText.textContent = "Loading Model...";
241
+ statusSub.textContent = "Downloading RF-DETR Medium (fp32)";
242
+
243
  try {
244
+ detector = await pipeline('object-detection', 'onnx-community/rfdetr_medium-ONNX', {
245
  device: 'webgpu',
246
  dtype: 'fp32',
247
  });
248
+
249
  // 3. Warmup
250
  statusText.textContent = "Compiling Shaders...";
251
  statusSub.textContent = "This may take a moment";
252
+
253
  inputCtx.drawImage(video, 0, 0, inputCanvas.width, inputCanvas.height);
254
  await detector(inputCanvas, { threshold: 0.5, percentage: true });
255
 
256
+ statusOverlay.style.opacity = '0';
257
+ setTimeout(() => statusOverlay.style.display = 'none', 300);
 
258
  } catch (e) {
259
+ showError("Model Error", e.message);
 
 
260
  throw e;
261
  }
262
 
263
  // 4. Render Loop
264
  async function loop() {
265
+ if (paused) return;
266
+
267
  const now = performance.now();
268
  const dt = now - lastTime;
269
  lastTime = now;
270
+
271
  if (dt > 0) {
272
  fpsElem.textContent = `FPS: ${(1000 / dt).toFixed(1)}`;
273
  }
274
 
275
  inputCtx.drawImage(video, 0, 0, inputCanvas.width, inputCanvas.height);
276
 
277
+ let results = await detector(inputCanvas, { threshold, percentage: true });
278
+ if (allowedLabels) {
279
+ results = results.filter(r => allowedLabels.has(r.label.toLowerCase()));
280
+ }
281
  drawResults(results);
282
 
283
  requestAnimationFrame(loop);
284
  }
285
 
286
  function drawResults(results) {
287
+ const { x: vx, y: vy, w, h } = videoRect;
288
+
289
+ // Clear canvas (reset transform to clear full physical size, then restore DPR scale)
290
+ overlayCtx.setTransform(1, 0, 0, 1, 0, 0);
291
+ overlayCtx.clearRect(0, 0, overlay.width, overlay.height);
292
+ const dpr = window.devicePixelRatio || 1;
293
+ overlayCtx.setTransform(dpr, 0, 0, dpr, 0, 0);
294
+
295
+ overlayCtx.font = '600 13px system-ui';
296
+ overlayCtx.lineWidth = 2.5;
297
+
298
+ for (let i = 0; i < results.length; ++i) {
299
+ const { box, label, score } = results[i];
300
+ const color = getColorForLabel(label);
301
+
302
+ const x1 = vx + box.xmin * w;
303
+ const y1 = vy + box.ymin * h;
304
+ const bw = (box.xmax - box.xmin) * w;
305
+ const bh = (box.ymax - box.ymin) * h;
306
+
307
+ // Bounding box
308
+ overlayCtx.strokeStyle = color;
309
+ overlayCtx.beginPath();
310
+ overlayCtx.roundRect(x1, y1, bw, bh, 6);
311
+ overlayCtx.stroke();
312
+
313
+ // Label background
314
+ const text = `${label} ${(score * 100).toFixed(0)}%`;
315
+ const textWidth = overlayCtx.measureText(text).width;
316
+
317
+ overlayCtx.fillStyle = color;
318
+ overlayCtx.beginPath();
319
+ overlayCtx.roundRect(x1, y1 - 26, textWidth + 12, 22, 4);
320
+ overlayCtx.fill();
321
+
322
+ // Label text
323
+ overlayCtx.fillStyle = 'white';
324
+ overlayCtx.fillText(text, x1 + 6, y1 - 9);
325
+ }
 
 
 
 
326
  }
327
 
328
  requestAnimationFrame(loop);