Rewrite as 1Hz detect-then-move loop to fix feedback oscillation
Browse filesThe camera is on the robot's head, so moving the head changes where
the hand appears in the frame. At high frequency (5Hz+), this creates
a feedback loop of conflicting commands causing chaotic movement.
New architecture:
- Control loop at 1Hz: one fresh detection + one lookAtImage(u,v,0.8)
- Head smoothly interpolates over 0.8s, settles 0.2s before next tick
- Render loop (every frame) only draws hand skeleton overlay, no control
- No EMA smoothing needed — single sample per second is stable enough
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
app.js
CHANGED
|
@@ -19,7 +19,7 @@ function checkMinVersion(ver, min) {
|
|
| 19 |
if ((v[i] || 0) > (m[i] || 0)) return true;
|
| 20 |
if ((v[i] || 0) < (m[i] || 0)) return false;
|
| 21 |
}
|
| 22 |
-
return true;
|
| 23 |
}
|
| 24 |
|
| 25 |
// ─── State ───
|
|
@@ -29,23 +29,21 @@ let detachVideo = null;
|
|
| 29 |
let selectedRobotId = null;
|
| 30 |
let controlTimer = null;
|
| 31 |
|
| 32 |
-
//
|
| 33 |
-
const
|
| 34 |
-
|
|
|
|
| 35 |
|
| 36 |
-
// Tracking state
|
| 37 |
let trackingEnabled = true;
|
| 38 |
-
let handPixelX = NaN, handPixelY = NaN; // latest smoothed hand position in pixels
|
| 39 |
-
let lastDetectTs = -1;
|
| 40 |
-
let frameCount = 0;
|
| 41 |
-
let fpsTime = 0;
|
| 42 |
let noHandTicks = 0;
|
|
|
|
| 43 |
|
| 44 |
-
//
|
| 45 |
-
let
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
|
| 50 |
// Canvas sync cache
|
| 51 |
let cachedVideoW = 0, cachedVideoH = 0;
|
|
@@ -217,7 +215,7 @@ function updateTrackBadge() {
|
|
| 217 |
if (!trackingEnabled) {
|
| 218 |
b.className = 'badge tracking-disabled';
|
| 219 |
b.textContent = 'Tracking off';
|
| 220 |
-
} else if (
|
| 221 |
b.className = 'badge tracking-searching';
|
| 222 |
b.textContent = 'Searching...';
|
| 223 |
} else {
|
|
@@ -229,10 +227,6 @@ function updateTrackBadge() {
|
|
| 229 |
// ─── Tracking toggle ───
|
| 230 |
$('trackingToggle').addEventListener('change', (e) => {
|
| 231 |
trackingEnabled = e.target.checked;
|
| 232 |
-
if (trackingEnabled) {
|
| 233 |
-
handPixelX = NaN;
|
| 234 |
-
handPixelY = NaN;
|
| 235 |
-
}
|
| 236 |
updateTrackBadge();
|
| 237 |
if (!trackingEnabled) clearCanvas();
|
| 238 |
});
|
|
@@ -244,28 +238,9 @@ $('tuneBtn').addEventListener('click', () => {
|
|
| 244 |
|
| 245 |
function initTuning() {
|
| 246 |
const ss = $('smoothSlider');
|
| 247 |
-
ss.addEventListener('input', () => {
|
| 248 |
}
|
| 249 |
|
| 250 |
-
// ─── TEST MODE: click-to-look-at ───
|
| 251 |
-
canvas.addEventListener('click', (e) => {
|
| 252 |
-
if (!TEST_MODE || robot.state !== 'streaming') return;
|
| 253 |
-
const vw = video.videoWidth, vh = video.videoHeight;
|
| 254 |
-
if (!vw || !vh) return;
|
| 255 |
-
|
| 256 |
-
// Convert click position (on canvas element) to video pixel coordinates
|
| 257 |
-
const rect = canvas.getBoundingClientRect();
|
| 258 |
-
const u = (e.clientX - rect.left) / rect.width * vw;
|
| 259 |
-
const v = (e.clientY - rect.top) / rect.height * vh;
|
| 260 |
-
|
| 261 |
-
clickMarker = { x: u, y: v };
|
| 262 |
-
console.log(`look_at_image(${Math.round(u)}, ${Math.round(v)}) — video: ${vw}x${vh}`);
|
| 263 |
-
robot.lookAtImage(u, v);
|
| 264 |
-
|
| 265 |
-
// Update HUD
|
| 266 |
-
$('headBadge').textContent = `click: ${Math.round(u)},${Math.round(v)}`;
|
| 267 |
-
});
|
| 268 |
-
|
| 269 |
// ─── MediaPipe detector ───
|
| 270 |
async function initDetector() {
|
| 271 |
if (detector) {
|
|
@@ -296,7 +271,8 @@ async function initDetector() {
|
|
| 296 |
}
|
| 297 |
|
| 298 |
// ═══════════════════════════════════════════════════════════════
|
| 299 |
-
// RENDER LOOP —
|
|
|
|
| 300 |
// ═══════════════════════════════════════════════════════════════
|
| 301 |
|
| 302 |
function startTracking() {
|
|
@@ -352,76 +328,56 @@ function renderFrame(now) {
|
|
| 352 |
fpsTime = now;
|
| 353 |
}
|
| 354 |
|
| 355 |
-
// Run detection
|
| 356 |
-
|
| 357 |
-
if (detector && trackingEnabled && now > lastDetectTs) {
|
| 358 |
try {
|
| 359 |
-
|
| 360 |
-
|
| 361 |
} catch (_) {}
|
| 362 |
}
|
| 363 |
|
| 364 |
-
//
|
| 365 |
ctx.clearRect(0, 0, canvas.width, canvas.height);
|
| 366 |
if (!trackingEnabled) return;
|
| 367 |
|
| 368 |
const w = canvas.width, h = canvas.height;
|
| 369 |
drawCrosshair(w, h);
|
| 370 |
|
| 371 |
-
// Draw
|
| 372 |
-
if (
|
| 373 |
ctx.beginPath();
|
| 374 |
-
ctx.arc(
|
| 375 |
ctx.strokeStyle = 'rgba(255, 50, 50, 0.9)';
|
| 376 |
ctx.lineWidth = 3;
|
| 377 |
ctx.stroke();
|
| 378 |
-
ctx.beginPath();
|
| 379 |
-
ctx.moveTo(clickMarker.x - 18, clickMarker.y);
|
| 380 |
-
ctx.lineTo(clickMarker.x + 18, clickMarker.y);
|
| 381 |
-
ctx.moveTo(clickMarker.x, clickMarker.y - 18);
|
| 382 |
-
ctx.lineTo(clickMarker.x, clickMarker.y + 18);
|
| 383 |
-
ctx.strokeStyle = 'rgba(255, 50, 50, 0.6)';
|
| 384 |
-
ctx.lineWidth = 1.5;
|
| 385 |
-
ctx.stroke();
|
| 386 |
}
|
| 387 |
|
| 388 |
-
|
| 389 |
-
|
|
|
|
| 390 |
drawHand(hand, w, h);
|
| 391 |
|
| 392 |
-
// Compute centroid in normalized coords (0-1)
|
| 393 |
let cx = 0, cy = 0;
|
| 394 |
for (let i = 0; i < hand.length; i++) { cx += hand[i].x; cy += hand[i].y; }
|
| 395 |
cx /= hand.length;
|
| 396 |
cy /= hand.length;
|
| 397 |
|
| 398 |
-
// Draw centroid
|
| 399 |
ctx.beginPath();
|
| 400 |
ctx.arc(cx * w, cy * h, 8, 0, Math.PI * 2);
|
| 401 |
ctx.fillStyle = 'rgba(255, 107, 53, 0.8)';
|
| 402 |
ctx.fill();
|
| 403 |
-
|
| 404 |
-
// Update shared hand position with EMA smoothing (in pixels)
|
| 405 |
-
const alpha = 1 - smoothing;
|
| 406 |
-
const rawPixelX = cx * w;
|
| 407 |
-
const rawPixelY = cy * h;
|
| 408 |
-
if (isNaN(handPixelX)) { handPixelX = rawPixelX; handPixelY = rawPixelY; }
|
| 409 |
-
else { handPixelX = alpha * rawPixelX + smoothing * handPixelX; handPixelY = alpha * rawPixelY + smoothing * handPixelY; }
|
| 410 |
-
|
| 411 |
-
// Draw error line from center to smoothed position
|
| 412 |
-
ctx.beginPath();
|
| 413 |
-
ctx.moveTo(0.5 * w, 0.5 * h);
|
| 414 |
-
ctx.lineTo(handPixelX, handPixelY);
|
| 415 |
-
ctx.strokeStyle = 'rgba(255, 107, 53, 0.4)';
|
| 416 |
-
ctx.lineWidth = 2;
|
| 417 |
-
ctx.setLineDash([6, 4]);
|
| 418 |
-
ctx.stroke();
|
| 419 |
-
ctx.setLineDash([]);
|
| 420 |
}
|
| 421 |
}
|
| 422 |
|
| 423 |
// ═══════════════════════════════════════════════════════════════
|
| 424 |
-
// CONTROL LOOP —
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 425 |
// ═══════════════════════════════════════════════════════════════
|
| 426 |
|
| 427 |
function startControl() {
|
|
@@ -435,40 +391,47 @@ function stopControl() {
|
|
| 435 |
|
| 436 |
function controlTick() {
|
| 437 |
if (robot.state !== 'streaming') { stopControl(); return; }
|
|
|
|
| 438 |
|
| 439 |
-
|
| 440 |
-
if (
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 448 |
|
| 449 |
-
|
|
|
|
|
|
|
| 450 |
noHandTicks++;
|
| 451 |
-
// After timeout with no hand, look at center
|
| 452 |
if (noHandTicks > NO_HAND_RETURN_TICKS) {
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
}
|
| 457 |
}
|
| 458 |
-
} else {
|
| 459 |
-
noHandTicks = 0;
|
| 460 |
-
// Send pixel coordinates — the daemon handles all the math
|
| 461 |
-
robot.lookAtImage(handPixelX, handPixelY);
|
| 462 |
}
|
| 463 |
|
| 464 |
-
// Update HUD
|
| 465 |
updateTrackBadge();
|
| 466 |
-
if (!isNaN(handPixelX)) {
|
| 467 |
-
$('headBadge').textContent = `px:${Math.round(handPixelX)},${Math.round(handPixelY)}`;
|
| 468 |
-
}
|
| 469 |
|
| 470 |
-
// Latency (every
|
| 471 |
-
const now = performance.now();
|
| 472 |
if (now - lastLatencyTime >= 2000) {
|
| 473 |
lastLatencyTime = now;
|
| 474 |
updateLatencyBadge();
|
|
@@ -514,8 +477,9 @@ function clearCanvas() {
|
|
| 514 |
}
|
| 515 |
|
| 516 |
function resetTrackingState() {
|
| 517 |
-
|
| 518 |
-
|
|
|
|
| 519 |
noHandTicks = 0;
|
| 520 |
}
|
| 521 |
|
|
|
|
| 19 |
if ((v[i] || 0) > (m[i] || 0)) return true;
|
| 20 |
if ((v[i] || 0) < (m[i] || 0)) return false;
|
| 21 |
}
|
| 22 |
+
return true;
|
| 23 |
}
|
| 24 |
|
| 25 |
// ─── State ───
|
|
|
|
| 29 |
let selectedRobotId = null;
|
| 30 |
let controlTimer = null;
|
| 31 |
|
| 32 |
+
// Control config
|
| 33 |
+
const CONTROL_INTERVAL = 1000; // 1 Hz — one detect + one move per second
|
| 34 |
+
const MOVE_DURATION = 0.8; // smooth interpolation over 0.8s (leaves 0.2s settle)
|
| 35 |
+
const NO_HAND_RETURN_TICKS = 5; // 5 seconds without hand → return to center
|
| 36 |
|
| 37 |
+
// Tracking state
|
| 38 |
let trackingEnabled = true;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
let noHandTicks = 0;
|
| 40 |
+
let lastTarget = null; // {u, v} — last sent target for HUD display
|
| 41 |
|
| 42 |
+
// Render-only state (for visual overlay between control ticks)
|
| 43 |
+
let lastRenderDetectTs = -1;
|
| 44 |
+
let lastRenderResults = null;
|
| 45 |
+
let frameCount = 0;
|
| 46 |
+
let fpsTime = 0;
|
| 47 |
|
| 48 |
// Canvas sync cache
|
| 49 |
let cachedVideoW = 0, cachedVideoH = 0;
|
|
|
|
| 215 |
if (!trackingEnabled) {
|
| 216 |
b.className = 'badge tracking-disabled';
|
| 217 |
b.textContent = 'Tracking off';
|
| 218 |
+
} else if (!lastTarget) {
|
| 219 |
b.className = 'badge tracking-searching';
|
| 220 |
b.textContent = 'Searching...';
|
| 221 |
} else {
|
|
|
|
| 227 |
// ─── Tracking toggle ───
|
| 228 |
$('trackingToggle').addEventListener('change', (e) => {
|
| 229 |
trackingEnabled = e.target.checked;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
updateTrackBadge();
|
| 231 |
if (!trackingEnabled) clearCanvas();
|
| 232 |
});
|
|
|
|
| 238 |
|
| 239 |
function initTuning() {
|
| 240 |
const ss = $('smoothSlider');
|
| 241 |
+
ss.addEventListener('input', () => { $('smoothVal').textContent = (+ss.value / 100).toFixed(2); });
|
| 242 |
}
|
| 243 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
// ─── MediaPipe detector ───
|
| 245 |
async function initDetector() {
|
| 246 |
if (detector) {
|
|
|
|
| 271 |
}
|
| 272 |
|
| 273 |
// ═══════════════════════════════════════════════════════════════
|
| 274 |
+
// RENDER LOOP — visual overlay only (no control, just drawing)
|
| 275 |
+
// Runs every video frame for smooth FPS counter + hand skeleton
|
| 276 |
// ═══════════════════════════════════════════════════════════════
|
| 277 |
|
| 278 |
function startTracking() {
|
|
|
|
| 328 |
fpsTime = now;
|
| 329 |
}
|
| 330 |
|
| 331 |
+
// Run detection for visual overlay (doesn't affect control)
|
| 332 |
+
if (detector && trackingEnabled && now > lastRenderDetectTs) {
|
|
|
|
| 333 |
try {
|
| 334 |
+
lastRenderResults = detector.detectForVideo(video, now);
|
| 335 |
+
lastRenderDetectTs = now;
|
| 336 |
} catch (_) {}
|
| 337 |
}
|
| 338 |
|
| 339 |
+
// Draw
|
| 340 |
ctx.clearRect(0, 0, canvas.width, canvas.height);
|
| 341 |
if (!trackingEnabled) return;
|
| 342 |
|
| 343 |
const w = canvas.width, h = canvas.height;
|
| 344 |
drawCrosshair(w, h);
|
| 345 |
|
| 346 |
+
// Draw last sent target
|
| 347 |
+
if (lastTarget) {
|
| 348 |
ctx.beginPath();
|
| 349 |
+
ctx.arc(lastTarget.u, lastTarget.v, 12, 0, Math.PI * 2);
|
| 350 |
ctx.strokeStyle = 'rgba(255, 50, 50, 0.9)';
|
| 351 |
ctx.lineWidth = 3;
|
| 352 |
ctx.stroke();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
}
|
| 354 |
|
| 355 |
+
// Draw hand skeleton from latest render detection
|
| 356 |
+
if (lastRenderResults && lastRenderResults.landmarks && lastRenderResults.landmarks.length > 0) {
|
| 357 |
+
const hand = lastRenderResults.landmarks[0];
|
| 358 |
drawHand(hand, w, h);
|
| 359 |
|
|
|
|
| 360 |
let cx = 0, cy = 0;
|
| 361 |
for (let i = 0; i < hand.length; i++) { cx += hand[i].x; cy += hand[i].y; }
|
| 362 |
cx /= hand.length;
|
| 363 |
cy /= hand.length;
|
| 364 |
|
|
|
|
| 365 |
ctx.beginPath();
|
| 366 |
ctx.arc(cx * w, cy * h, 8, 0, Math.PI * 2);
|
| 367 |
ctx.fillStyle = 'rgba(255, 107, 53, 0.8)';
|
| 368 |
ctx.fill();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
}
|
| 370 |
}
|
| 371 |
|
| 372 |
// ═══════════════════════════════════════════════════════════════
|
| 373 |
+
// CONTROL LOOP — 1 Hz: one detection + one movement per tick
|
| 374 |
+
//
|
| 375 |
+
// Why 1 Hz? The camera is on the robot's head. When we send
|
| 376 |
+
// look_at_image, the head moves, which moves the camera, which
|
| 377 |
+
// changes where the hand appears in the frame. At high frequency
|
| 378 |
+
// this creates a feedback loop of conflicting commands. At 1 Hz
|
| 379 |
+
// with duration=0.8s, the head smoothly moves to the target and
|
| 380 |
+
// has 0.2s to settle before the next detection.
|
| 381 |
// ═══════════════════════════════════════════════════════════════
|
| 382 |
|
| 383 |
function startControl() {
|
|
|
|
| 391 |
|
| 392 |
function controlTick() {
|
| 393 |
if (robot.state !== 'streaming') { stopControl(); return; }
|
| 394 |
+
if (!trackingEnabled || !detector) return;
|
| 395 |
|
| 396 |
+
const w = video.videoWidth, h = video.videoHeight;
|
| 397 |
+
if (!w || !h || video.readyState < 2) return;
|
| 398 |
+
|
| 399 |
+
// Run a fresh detection for control (independent of render loop)
|
| 400 |
+
let results = null;
|
| 401 |
+
const now = performance.now();
|
| 402 |
+
try {
|
| 403 |
+
results = detector.detectForVideo(video, now);
|
| 404 |
+
} catch (_) {}
|
| 405 |
+
|
| 406 |
+
if (results && results.landmarks && results.landmarks.length > 0) {
|
| 407 |
+
noHandTicks = 0;
|
| 408 |
+
const hand = results.landmarks[0];
|
| 409 |
+
|
| 410 |
+
// Compute centroid in pixels
|
| 411 |
+
let cx = 0, cy = 0;
|
| 412 |
+
for (let i = 0; i < hand.length; i++) { cx += hand[i].x; cy += hand[i].y; }
|
| 413 |
+
cx /= hand.length;
|
| 414 |
+
cy /= hand.length;
|
| 415 |
+
const u = cx * w;
|
| 416 |
+
const v = cy * h;
|
| 417 |
+
|
| 418 |
+
lastTarget = { u, v };
|
| 419 |
+
robot.lookAtImage(u, v, MOVE_DURATION);
|
| 420 |
|
| 421 |
+
// HUD
|
| 422 |
+
$('headBadge').textContent = `px:${Math.round(u)},${Math.round(v)}`;
|
| 423 |
+
} else {
|
| 424 |
noHandTicks++;
|
|
|
|
| 425 |
if (noHandTicks > NO_HAND_RETURN_TICKS) {
|
| 426 |
+
lastTarget = null;
|
| 427 |
+
robot.lookAtImage(w / 2, h / 2, MOVE_DURATION);
|
| 428 |
+
$('headBadge').textContent = 'center';
|
|
|
|
| 429 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
}
|
| 431 |
|
|
|
|
| 432 |
updateTrackBadge();
|
|
|
|
|
|
|
|
|
|
| 433 |
|
| 434 |
+
// Latency (every ~2 ticks)
|
|
|
|
| 435 |
if (now - lastLatencyTime >= 2000) {
|
| 436 |
lastLatencyTime = now;
|
| 437 |
updateLatencyBadge();
|
|
|
|
| 477 |
}
|
| 478 |
|
| 479 |
function resetTrackingState() {
|
| 480 |
+
lastTarget = null;
|
| 481 |
+
lastRenderDetectTs = -1;
|
| 482 |
+
lastRenderResults = null;
|
| 483 |
noHandTicks = 0;
|
| 484 |
}
|
| 485 |
|
style.css
CHANGED
|
@@ -160,8 +160,7 @@ main {
|
|
| 160 |
|
| 161 |
.video-container canvas {
|
| 162 |
position: absolute;
|
| 163 |
-
pointer-events:
|
| 164 |
-
cursor: crosshair;
|
| 165 |
}
|
| 166 |
|
| 167 |
/* ── HUD badges ── */
|
|
|
|
| 160 |
|
| 161 |
.video-container canvas {
|
| 162 |
position: absolute;
|
| 163 |
+
pointer-events: none;
|
|
|
|
| 164 |
}
|
| 165 |
|
| 166 |
/* ── HUD badges ── */
|