cduss Claude Opus 4.6 (1M context) commited on
Commit
ab3cfe1
·
1 Parent(s): f49c6c8

Rewrite as 1Hz detect-then-move loop to fix feedback oscillation

Browse files

The camera is on the robot's head, so moving the head changes where
the hand appears in the frame. At high frequency (5Hz+), this creates
a feedback loop of conflicting commands causing chaotic movement.

New architecture:
- Control loop at 1Hz: one fresh detection + one lookAtImage(u,v,0.8)
- Head smoothly interpolates over 0.8s, settles 0.2s before next tick
- Render loop (every frame) only draws hand skeleton overlay, no control
- No EMA smoothing needed — single sample per second is stable enough

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (2) hide show
  1. app.js +70 -106
  2. style.css +1 -2
app.js CHANGED
@@ -19,7 +19,7 @@ function checkMinVersion(ver, min) {
19
  if ((v[i] || 0) > (m[i] || 0)) return true;
20
  if ((v[i] || 0) < (m[i] || 0)) return false;
21
  }
22
- return true; // equal
23
  }
24
 
25
  // ─── State ───
@@ -29,23 +29,21 @@ let detachVideo = null;
29
  let selectedRobotId = null;
30
  let controlTimer = null;
31
 
32
- // ─── TEST MODE: click on video to send look_at_image ───
33
- const TEST_MODE = true; // set to false to re-enable hand tracking control
34
- let clickMarker = null; // {x, y} in video pixels
 
35
 
36
- // Tracking state — shared between render loop and control loop
37
  let trackingEnabled = true;
38
- let handPixelX = NaN, handPixelY = NaN; // latest smoothed hand position in pixels
39
- let lastDetectTs = -1;
40
- let frameCount = 0;
41
- let fpsTime = 0;
42
  let noHandTicks = 0;
 
43
 
44
- // Tuning params
45
- let smoothing = 0.60;
46
- const CONTROL_HZ = 5;
47
- const CONTROL_INTERVAL = 1000 / CONTROL_HZ; // 200ms
48
- const NO_HAND_RETURN_TICKS = 30; // at 5Hz = 6 seconds
49
 
50
  // Canvas sync cache
51
  let cachedVideoW = 0, cachedVideoH = 0;
@@ -217,7 +215,7 @@ function updateTrackBadge() {
217
  if (!trackingEnabled) {
218
  b.className = 'badge tracking-disabled';
219
  b.textContent = 'Tracking off';
220
- } else if (isNaN(handPixelX)) {
221
  b.className = 'badge tracking-searching';
222
  b.textContent = 'Searching...';
223
  } else {
@@ -229,10 +227,6 @@ function updateTrackBadge() {
229
  // ─── Tracking toggle ───
230
  $('trackingToggle').addEventListener('change', (e) => {
231
  trackingEnabled = e.target.checked;
232
- if (trackingEnabled) {
233
- handPixelX = NaN;
234
- handPixelY = NaN;
235
- }
236
  updateTrackBadge();
237
  if (!trackingEnabled) clearCanvas();
238
  });
@@ -244,28 +238,9 @@ $('tuneBtn').addEventListener('click', () => {
244
 
245
  function initTuning() {
246
  const ss = $('smoothSlider');
247
- ss.addEventListener('input', () => { smoothing = +ss.value / 100; $('smoothVal').textContent = smoothing.toFixed(2); });
248
  }
249
 
250
- // ─── TEST MODE: click-to-look-at ───
251
- canvas.addEventListener('click', (e) => {
252
- if (!TEST_MODE || robot.state !== 'streaming') return;
253
- const vw = video.videoWidth, vh = video.videoHeight;
254
- if (!vw || !vh) return;
255
-
256
- // Convert click position (on canvas element) to video pixel coordinates
257
- const rect = canvas.getBoundingClientRect();
258
- const u = (e.clientX - rect.left) / rect.width * vw;
259
- const v = (e.clientY - rect.top) / rect.height * vh;
260
-
261
- clickMarker = { x: u, y: v };
262
- console.log(`look_at_image(${Math.round(u)}, ${Math.round(v)}) — video: ${vw}x${vh}`);
263
- robot.lookAtImage(u, v);
264
-
265
- // Update HUD
266
- $('headBadge').textContent = `click: ${Math.round(u)},${Math.round(v)}`;
267
- });
268
-
269
  // ─── MediaPipe detector ───
270
  async function initDetector() {
271
  if (detector) {
@@ -296,7 +271,8 @@ async function initDetector() {
296
  }
297
 
298
  // ═══════════════════════════════════════════════════════════════
299
- // RENDER LOOP — runs every video frame, does detection + drawing
 
300
  // ═══════════════════════════════════════════════════════════════
301
 
302
  function startTracking() {
@@ -352,76 +328,56 @@ function renderFrame(now) {
352
  fpsTime = now;
353
  }
354
 
355
- // Run detection
356
- let results = null;
357
- if (detector && trackingEnabled && now > lastDetectTs) {
358
  try {
359
- results = detector.detectForVideo(video, now);
360
- lastDetectTs = now;
361
  } catch (_) {}
362
  }
363
 
364
- // Clear & draw
365
  ctx.clearRect(0, 0, canvas.width, canvas.height);
366
  if (!trackingEnabled) return;
367
 
368
  const w = canvas.width, h = canvas.height;
369
  drawCrosshair(w, h);
370
 
371
- // Draw click marker in test mode
372
- if (TEST_MODE && clickMarker) {
373
  ctx.beginPath();
374
- ctx.arc(clickMarker.x, clickMarker.y, 12, 0, Math.PI * 2);
375
  ctx.strokeStyle = 'rgba(255, 50, 50, 0.9)';
376
  ctx.lineWidth = 3;
377
  ctx.stroke();
378
- ctx.beginPath();
379
- ctx.moveTo(clickMarker.x - 18, clickMarker.y);
380
- ctx.lineTo(clickMarker.x + 18, clickMarker.y);
381
- ctx.moveTo(clickMarker.x, clickMarker.y - 18);
382
- ctx.lineTo(clickMarker.x, clickMarker.y + 18);
383
- ctx.strokeStyle = 'rgba(255, 50, 50, 0.6)';
384
- ctx.lineWidth = 1.5;
385
- ctx.stroke();
386
  }
387
 
388
- if (results && results.landmarks && results.landmarks.length > 0) {
389
- const hand = results.landmarks[0];
 
390
  drawHand(hand, w, h);
391
 
392
- // Compute centroid in normalized coords (0-1)
393
  let cx = 0, cy = 0;
394
  for (let i = 0; i < hand.length; i++) { cx += hand[i].x; cy += hand[i].y; }
395
  cx /= hand.length;
396
  cy /= hand.length;
397
 
398
- // Draw centroid
399
  ctx.beginPath();
400
  ctx.arc(cx * w, cy * h, 8, 0, Math.PI * 2);
401
  ctx.fillStyle = 'rgba(255, 107, 53, 0.8)';
402
  ctx.fill();
403
-
404
- // Update shared hand position with EMA smoothing (in pixels)
405
- const alpha = 1 - smoothing;
406
- const rawPixelX = cx * w;
407
- const rawPixelY = cy * h;
408
- if (isNaN(handPixelX)) { handPixelX = rawPixelX; handPixelY = rawPixelY; }
409
- else { handPixelX = alpha * rawPixelX + smoothing * handPixelX; handPixelY = alpha * rawPixelY + smoothing * handPixelY; }
410
-
411
- // Draw error line from center to smoothed position
412
- ctx.beginPath();
413
- ctx.moveTo(0.5 * w, 0.5 * h);
414
- ctx.lineTo(handPixelX, handPixelY);
415
- ctx.strokeStyle = 'rgba(255, 107, 53, 0.4)';
416
- ctx.lineWidth = 2;
417
- ctx.setLineDash([6, 4]);
418
- ctx.stroke();
419
- ctx.setLineDash([]);
420
  }
421
  }
422
 
423
  // ═══════════════════════════════════════════════════════════════
424
- // CONTROL LOOP — fixed 5 Hz, sends look_at_image commands
 
 
 
 
 
 
 
425
  // ═══════════════════════════════════════════════════════════════
426
 
427
  function startControl() {
@@ -435,40 +391,47 @@ function stopControl() {
435
 
436
  function controlTick() {
437
  if (robot.state !== 'streaming') { stopControl(); return; }
 
438
 
439
- // In test mode, control loop only updates latency — clicks send commands directly
440
- if (TEST_MODE) {
441
- const now = performance.now();
442
- if (now - lastLatencyTime >= 2000) {
443
- lastLatencyTime = now;
444
- updateLatencyBadge();
445
- }
446
- return;
447
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
 
449
- if (!trackingEnabled || isNaN(handPixelX)) {
 
 
450
  noHandTicks++;
451
- // After timeout with no hand, look at center
452
  if (noHandTicks > NO_HAND_RETURN_TICKS) {
453
- const w = video.videoWidth, h = video.videoHeight;
454
- if (w && h) {
455
- robot.lookAtImage(w / 2, h / 2);
456
- }
457
  }
458
- } else {
459
- noHandTicks = 0;
460
- // Send pixel coordinates — the daemon handles all the math
461
- robot.lookAtImage(handPixelX, handPixelY);
462
  }
463
 
464
- // Update HUD
465
  updateTrackBadge();
466
- if (!isNaN(handPixelX)) {
467
- $('headBadge').textContent = `px:${Math.round(handPixelX)},${Math.round(handPixelY)}`;
468
- }
469
 
470
- // Latency (every 2s)
471
- const now = performance.now();
472
  if (now - lastLatencyTime >= 2000) {
473
  lastLatencyTime = now;
474
  updateLatencyBadge();
@@ -514,8 +477,9 @@ function clearCanvas() {
514
  }
515
 
516
  function resetTrackingState() {
517
- handPixelX = NaN; handPixelY = NaN;
518
- lastDetectTs = -1;
 
519
  noHandTicks = 0;
520
  }
521
 
 
19
  if ((v[i] || 0) > (m[i] || 0)) return true;
20
  if ((v[i] || 0) < (m[i] || 0)) return false;
21
  }
22
+ return true;
23
  }
24
 
25
  // ─── State ───
 
29
  let selectedRobotId = null;
30
  let controlTimer = null;
31
 
32
+ // Control config
33
+ const CONTROL_INTERVAL = 1000; // 1 Hz one detect + one move per second
34
+ const MOVE_DURATION = 0.8; // smooth interpolation over 0.8s (leaves 0.2s settle)
35
+ const NO_HAND_RETURN_TICKS = 5; // 5 seconds without hand → return to center
36
 
37
+ // Tracking state
38
  let trackingEnabled = true;
 
 
 
 
39
  let noHandTicks = 0;
40
+ let lastTarget = null; // {u, v} — last sent target for HUD display
41
 
42
+ // Render-only state (for visual overlay between control ticks)
43
+ let lastRenderDetectTs = -1;
44
+ let lastRenderResults = null;
45
+ let frameCount = 0;
46
+ let fpsTime = 0;
47
 
48
  // Canvas sync cache
49
  let cachedVideoW = 0, cachedVideoH = 0;
 
215
  if (!trackingEnabled) {
216
  b.className = 'badge tracking-disabled';
217
  b.textContent = 'Tracking off';
218
+ } else if (!lastTarget) {
219
  b.className = 'badge tracking-searching';
220
  b.textContent = 'Searching...';
221
  } else {
 
227
  // ─── Tracking toggle ───
228
  $('trackingToggle').addEventListener('change', (e) => {
229
  trackingEnabled = e.target.checked;
 
 
 
 
230
  updateTrackBadge();
231
  if (!trackingEnabled) clearCanvas();
232
  });
 
238
 
239
  function initTuning() {
240
  const ss = $('smoothSlider');
241
+ ss.addEventListener('input', () => { $('smoothVal').textContent = (+ss.value / 100).toFixed(2); });
242
  }
243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  // ─── MediaPipe detector ───
245
  async function initDetector() {
246
  if (detector) {
 
271
  }
272
 
273
  // ═══════════════════════════════════════════════════════════════
274
+ // RENDER LOOP — visual overlay only (no control, just drawing)
275
+ // Runs every video frame for smooth FPS counter + hand skeleton
276
  // ═══════════════════════════════════════════════════════════════
277
 
278
  function startTracking() {
 
328
  fpsTime = now;
329
  }
330
 
331
+ // Run detection for visual overlay (doesn't affect control)
332
+ if (detector && trackingEnabled && now > lastRenderDetectTs) {
 
333
  try {
334
+ lastRenderResults = detector.detectForVideo(video, now);
335
+ lastRenderDetectTs = now;
336
  } catch (_) {}
337
  }
338
 
339
+ // Draw
340
  ctx.clearRect(0, 0, canvas.width, canvas.height);
341
  if (!trackingEnabled) return;
342
 
343
  const w = canvas.width, h = canvas.height;
344
  drawCrosshair(w, h);
345
 
346
+ // Draw last sent target
347
+ if (lastTarget) {
348
  ctx.beginPath();
349
+ ctx.arc(lastTarget.u, lastTarget.v, 12, 0, Math.PI * 2);
350
  ctx.strokeStyle = 'rgba(255, 50, 50, 0.9)';
351
  ctx.lineWidth = 3;
352
  ctx.stroke();
 
 
 
 
 
 
 
 
353
  }
354
 
355
+ // Draw hand skeleton from latest render detection
356
+ if (lastRenderResults && lastRenderResults.landmarks && lastRenderResults.landmarks.length > 0) {
357
+ const hand = lastRenderResults.landmarks[0];
358
  drawHand(hand, w, h);
359
 
 
360
  let cx = 0, cy = 0;
361
  for (let i = 0; i < hand.length; i++) { cx += hand[i].x; cy += hand[i].y; }
362
  cx /= hand.length;
363
  cy /= hand.length;
364
 
 
365
  ctx.beginPath();
366
  ctx.arc(cx * w, cy * h, 8, 0, Math.PI * 2);
367
  ctx.fillStyle = 'rgba(255, 107, 53, 0.8)';
368
  ctx.fill();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
  }
370
  }
371
 
372
  // ═══════════════════════════════════════════════════════════════
373
+ // CONTROL LOOP — 1 Hz: one detection + one movement per tick
374
+ //
375
+ // Why 1 Hz? The camera is on the robot's head. When we send
376
+ // look_at_image, the head moves, which moves the camera, which
377
+ // changes where the hand appears in the frame. At high frequency
378
+ // this creates a feedback loop of conflicting commands. At 1 Hz
379
+ // with duration=0.8s, the head smoothly moves to the target and
380
+ // has 0.2s to settle before the next detection.
381
  // ═══════════════════════════════════════════════════════════════
382
 
383
  function startControl() {
 
391
 
392
  function controlTick() {
393
  if (robot.state !== 'streaming') { stopControl(); return; }
394
+ if (!trackingEnabled || !detector) return;
395
 
396
+ const w = video.videoWidth, h = video.videoHeight;
397
+ if (!w || !h || video.readyState < 2) return;
398
+
399
+ // Run a fresh detection for control (independent of render loop)
400
+ let results = null;
401
+ const now = performance.now();
402
+ try {
403
+ results = detector.detectForVideo(video, now);
404
+ } catch (_) {}
405
+
406
+ if (results && results.landmarks && results.landmarks.length > 0) {
407
+ noHandTicks = 0;
408
+ const hand = results.landmarks[0];
409
+
410
+ // Compute centroid in pixels
411
+ let cx = 0, cy = 0;
412
+ for (let i = 0; i < hand.length; i++) { cx += hand[i].x; cy += hand[i].y; }
413
+ cx /= hand.length;
414
+ cy /= hand.length;
415
+ const u = cx * w;
416
+ const v = cy * h;
417
+
418
+ lastTarget = { u, v };
419
+ robot.lookAtImage(u, v, MOVE_DURATION);
420
 
421
+ // HUD
422
+ $('headBadge').textContent = `px:${Math.round(u)},${Math.round(v)}`;
423
+ } else {
424
  noHandTicks++;
 
425
  if (noHandTicks > NO_HAND_RETURN_TICKS) {
426
+ lastTarget = null;
427
+ robot.lookAtImage(w / 2, h / 2, MOVE_DURATION);
428
+ $('headBadge').textContent = 'center';
 
429
  }
 
 
 
 
430
  }
431
 
 
432
  updateTrackBadge();
 
 
 
433
 
434
+ // Latency (every ~2 ticks)
 
435
  if (now - lastLatencyTime >= 2000) {
436
  lastLatencyTime = now;
437
  updateLatencyBadge();
 
477
  }
478
 
479
  function resetTrackingState() {
480
+ lastTarget = null;
481
+ lastRenderDetectTs = -1;
482
+ lastRenderResults = null;
483
  noHandTicks = 0;
484
  }
485
 
style.css CHANGED
@@ -160,8 +160,7 @@ main {
160
 
161
  .video-container canvas {
162
  position: absolute;
163
- pointer-events: auto;
164
- cursor: crosshair;
165
  }
166
 
167
  /* ── HUD badges ── */
 
160
 
161
  .video-container canvas {
162
  position: absolute;
163
+ pointer-events: none;
 
164
  }
165
 
166
  /* ── HUD badges ── */