gaze_test / web /gaze_tracking.html
Olof Astrand
Added web inference option
47bec77
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Gaze Tracking Interface</title>
<style>
body {
margin: 0;
padding: 0;
font-family: Arial, sans-serif;
background-color: #1a1a1a;
color: white;
overflow: hidden;
}
#container {
display: flex;
height: 100vh;
}
#video-container {
position: relative;
width: 320px;
background-color: #2a2a2a;
padding: 20px;
}
#video {
width: 100%;
height: 240px;
background-color: #000;
border: 2px solid #444;
border-radius: 8px;
}
#canvas {
display: none;
}
#gaze-screen {
flex: 1;
position: relative;
background-color: #000;
cursor: none;
}
#gaze-cursor {
position: absolute;
width: 40px;
height: 40px;
pointer-events: none;
transition: transform 0.1s ease-out;
transform: translate(-50%, -50%);
}
.crosshair {
position: absolute;
background-color: #00ff00;
}
.crosshair-h {
width: 40px;
height: 3px;
top: 50%;
left: 0;
transform: translateY(-50%);
}
.crosshair-v {
width: 3px;
height: 40px;
left: 50%;
top: 0;
transform: translateX(-50%);
}
.center-dot {
position: absolute;
width: 10px;
height: 10px;
background-color: #ff0000;
border: 2px solid #fff;
border-radius: 50%;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
}
#trail {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
pointer-events: none;
}
.controls {
margin-top: 20px;
}
button {
background-color: #4CAF50;
border: none;
color: white;
padding: 10px 20px;
margin: 5px;
cursor: pointer;
border-radius: 4px;
font-size: 14px;
transition: background-color 0.3s;
}
button:hover {
background-color: #45a049;
}
button:disabled {
background-color: #666;
cursor: not-allowed;
}
#status {
margin-top: 20px;
padding: 10px;
background-color: #333;
border-radius: 4px;
font-size: 14px;
}
.status-connected {
color: #4CAF50;
}
.status-disconnected {
color: #f44336;
}
.info {
margin-top: 20px;
font-size: 12px;
color: #888;
}
#fps {
position: absolute;
top: 10px;
left: 10px;
background-color: rgba(0, 0, 0, 0.7);
padding: 5px 10px;
border-radius: 4px;
font-size: 14px;
}
#coordinates {
position: absolute;
top: 40px;
left: 10px;
background-color: rgba(0, 0, 0, 0.7);
padding: 5px 10px;
border-radius: 4px;
font-size: 14px;
}
.face-box {
position: absolute;
border: 2px solid #00ff00;
pointer-events: none;
}
.eye-box {
position: absolute;
border: 2px solid #ffff00;
pointer-events: none;
}
#smoothing-slider {
width: 100%;
margin-top: 10px;
}
.slider-container {
margin-top: 20px;
}
.slider-label {
font-size: 12px;
color: #888;
margin-bottom: 5px;
}
</style>
</head>
<body>
<div id="container">
<div id="video-container">
<video id="video" autoplay></video>
<canvas id="canvas"></canvas>
<div class="controls">
<button id="startBtn">Start Tracking</button>
<button id="stopBtn" disabled>Stop Tracking</button>
<button id="calibrateBtn">Calibrate</button>
</div>
<div id="status" class="status-disconnected">
Status: Not connected
</div>
<div class="slider-container">
<div class="slider-label">Smoothing: <span id="smoothing-value">5</span></div>
<input type="range" id="smoothing-slider" min="1" max="20" value="5">
</div>
<div class="info">
<p>Face Detection: <span id="face-status">Not detected</span></p>
<p>Model Inference: <span id="inference-time">0</span> ms</p>
<p>Server: <span id="server-url">http://localhost:5000</span></p>
</div>
</div>
<div id="gaze-screen">
<canvas id="trail"></canvas>
<div id="gaze-cursor">
<div class="crosshair crosshair-h"></div>
<div class="crosshair crosshair-v"></div>
<div class="center-dot"></div>
</div>
<div id="fps">FPS: 0</div>
<div id="coordinates">X: 0, Y: 0</div>
</div>
</div>
<script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs"></script>
<script src="https://cdn.jsdelivr.net/npm/@tensorflow-models/blazeface"></script>
<script>
class GazeTracker {
constructor() {
this.video = document.getElementById('video');
this.canvas = document.getElementById('canvas');
this.ctx = this.canvas.getContext('2d');
this.trailCanvas = document.getElementById('trail');
this.trailCtx = this.trailCanvas.getContext('2d');
this.gazeCursor = document.getElementById('gaze-cursor');
this.startBtn = document.getElementById('startBtn');
this.stopBtn = document.getElementById('stopBtn');
this.calibrateBtn = document.getElementById('calibrateBtn');
this.smoothingSlider = document.getElementById('smoothing-slider');
this.isTracking = false;
this.faceModel = null;
this.serverUrl = 'http://localhost:5000';
// Gaze position and smoothing
this.currentGaze = { x: window.innerWidth / 2, y: window.innerHeight / 2 };
this.gazeHistory = [];
this.smoothingWindow = 5;
// Initialize Kalman filter after DOM is ready
this.kalmanFilter = null;
// Trail points
this.trailPoints = [];
this.maxTrailLength = 30;
// Performance tracking
this.lastTime = performance.now();
this.frameCount = 0;
this.fps = 0;
this.setupEventListeners();
this.resizeTrailCanvas();
window.addEventListener('resize', () => this.resizeTrailCanvas());
// Initialize Kalman filter after a short delay to ensure DOM is ready
setTimeout(() => {
this.kalmanFilter = this.initKalmanFilter();
}, 100);
}
initKalmanFilter() {
// Get initial screen dimensions
const gazeScreen = document.getElementById('gaze-screen');
const initialX = gazeScreen ? gazeScreen.offsetWidth / 2 : window.innerWidth / 2;
const initialY = gazeScreen ? gazeScreen.offsetHeight / 2 : window.innerHeight / 2;
return {
x: { estimate: initialX, uncertainty: 1000 },
y: { estimate: initialY, uncertainty: 1000 },
processNoise: 1,
measurementNoise: 25
};
}
kalmanUpdate(axis, measurement) {
const filter = this.kalmanFilter[axis];
// Check for valid measurement
if (isNaN(measurement) || !isFinite(measurement)) {
console.warn(`Invalid measurement for ${axis}: ${measurement}`);
return filter.estimate;
}
// Predict
filter.uncertainty += filter.processNoise;
// Update
const gain = filter.uncertainty / (filter.uncertainty + filter.measurementNoise);
filter.estimate = filter.estimate + gain * (measurement - filter.estimate);
filter.uncertainty = (1 - gain) * filter.uncertainty;
// Check for NaN
if (isNaN(filter.estimate) || !isFinite(filter.estimate)) {
console.warn(`Kalman filter produced NaN for ${axis}, resetting...`);
// Reset to measurement
filter.estimate = measurement;
filter.uncertainty = 1000;
}
return filter.estimate;
}
resizeTrailCanvas() {
const gazeScreen = document.getElementById('gaze-screen');
this.trailCanvas.width = gazeScreen.offsetWidth;
this.trailCanvas.height = gazeScreen.offsetHeight;
}
setupEventListeners() {
this.startBtn.addEventListener('click', () => this.start());
this.stopBtn.addEventListener('click', () => this.stop());
this.calibrateBtn.addEventListener('click', () => this.calibrate());
// Add keyboard shortcut for testing
document.addEventListener('keypress', (e) => {
if (e.key === 't' || e.key === 'T') {
// Test cursor movement
console.log('Testing cursor movement...');
const testX = Math.random() * window.innerWidth;
const testY = Math.random() * window.innerHeight;
this.updateGazePosition({ x: testX, y: testY });
} else if (e.key === 'k' || e.key === 'K') {
// Toggle Kalman filter
if (this.kalmanFilter) {
this.kalmanFilter = null;
console.log('Kalman filter disabled');
alert('Kalman filter disabled - using simple averaging only');
} else {
this.kalmanFilter = this.initKalmanFilter();
console.log('Kalman filter enabled');
alert('Kalman filter enabled');
}
}
});
this.smoothingSlider.addEventListener('input', (e) => {
this.smoothingWindow = parseInt(e.target.value);
document.getElementById('smoothing-value').textContent = this.smoothingWindow;
this.gazeHistory = [];
});
}
async start() {
try {
// Get camera stream
const stream = await navigator.mediaDevices.getUserMedia({
video: { width: 640, height: 480 }
});
this.video.srcObject = stream;
// Wait for video to load
await new Promise(resolve => {
this.video.onloadedmetadata = resolve;
});
// Set canvas size
this.canvas.width = this.video.videoWidth;
this.canvas.height = this.video.videoHeight;
// Load face detection model
if (!this.faceModel) {
this.updateStatus('Loading face detection model...', false);
this.faceModel = await blazeface.load();
}
// Check server connection
await this.checkServerConnection();
this.isTracking = true;
this.startBtn.disabled = true;
this.stopBtn.disabled = false;
this.updateStatus('Tracking active', true);
this.trackGaze();
} catch (error) {
console.error('Error starting tracking:', error);
this.updateStatus('Error: ' + error.message, false);
}
}
stop() {
this.isTracking = false;
if (this.video.srcObject) {
this.video.srcObject.getTracks().forEach(track => track.stop());
}
this.startBtn.disabled = false;
this.stopBtn.disabled = true;
this.updateStatus('Tracking stopped', false);
}
async checkServerConnection() {
try {
const response = await fetch(`${this.serverUrl}/health`);
if (!response.ok) throw new Error('Server not responding');
return true;
} catch (error) {
throw new Error('Cannot connect to inference server. Make sure the Python server is running.');
}
}
async trackGaze() {
if (!this.isTracking) return;
const startTime = performance.now();
// Capture frame
this.ctx.drawImage(this.video, 0, 0);
// Detect faces
const predictions = await this.faceModel.estimateFaces(
this.canvas,
false // Don't flip horizontally
);
if (predictions.length > 0) {
const face = predictions[0];
// Update face status
document.getElementById('face-status').textContent = 'Detected';
// Extract face region
const [x1, y1] = face.topLeft;
const [x2, y2] = face.bottomRight;
const width = x2 - x1;
const height = y2 - y1;
// Add padding
const padding = Math.max(width, height) * 0.2;
const faceX = Math.max(0, x1 - padding);
const faceY = Math.max(0, y1 - padding);
const faceWidth = Math.min(this.canvas.width - faceX, width + 2 * padding);
const faceHeight = Math.min(this.canvas.height - faceY, height + 2 * padding);
// Get face image data
const faceImageData = this.ctx.getImageData(faceX, faceY, faceWidth, faceHeight);
// Send to server for inference
const gazePosition = await this.sendToServer(faceImageData, {
x: faceX,
y: faceY,
width: faceWidth,
height: faceHeight
});
if (gazePosition) {
this.updateGazePosition(gazePosition);
}
} else {
document.getElementById('face-status').textContent = 'Not detected';
}
// Update performance metrics
this.updatePerformanceMetrics(startTime);
// Continue tracking
requestAnimationFrame(() => this.trackGaze());
}
async sendToServer(imageData, faceRect) {
try {
// Convert ImageData to base64
const tempCanvas = document.createElement('canvas');
tempCanvas.width = imageData.width;
tempCanvas.height = imageData.height;
const tempCtx = tempCanvas.getContext('2d');
tempCtx.putImageData(imageData, 0, 0);
const base64Image = tempCanvas.toDataURL('image/jpeg', 0.8).split(',')[1];
// Get actual screen dimensions
const gazeScreen = document.getElementById('gaze-screen');
const screenWidth = gazeScreen.offsetWidth;
const screenHeight = gazeScreen.offsetHeight;
console.log('Sending screen dimensions:', { screenWidth, screenHeight });
const response = await fetch(`${this.serverUrl}/predict`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
image: base64Image,
face_rect: faceRect,
screen_width: screenWidth,
screen_height: screenHeight
})
});
if (!response.ok) throw new Error('Server error');
const data = await response.json();
console.log('Received gaze position:', data.gaze_position);
// Update inference time
document.getElementById('inference-time').textContent =
data.inference_time ? data.inference_time.toFixed(1) : '0';
return data.gaze_position;
} catch (error) {
console.error('Error sending to server:', error);
return null;
}
}
updateGazePosition(position) {
// Validate input
if (!position || isNaN(position.x) || isNaN(position.y)) {
console.error('Invalid position received:', position);
return;
}
// Add to history
this.gazeHistory.push(position);
if (this.gazeHistory.length > this.smoothingWindow) {
this.gazeHistory.shift();
}
// Calculate smoothed position
let smoothedX, smoothedY;
if (this.gazeHistory.length > 0) {
// Moving average
const avgX = this.gazeHistory.reduce((sum, p) => sum + p.x, 0) / this.gazeHistory.length;
const avgY = this.gazeHistory.reduce((sum, p) => sum + p.y, 0) / this.gazeHistory.length;
// Try Kalman filter if initialized, otherwise use average
if (this.kalmanFilter) {
smoothedX = this.kalmanUpdate('x', avgX);
smoothedY = this.kalmanUpdate('y', avgY);
// Fallback if Kalman produces NaN
if (isNaN(smoothedX) || isNaN(smoothedY)) {
console.warn('Kalman filter failed, using average');
smoothedX = avgX;
smoothedY = avgY;
}
} else {
smoothedX = avgX;
smoothedY = avgY;
}
} else {
smoothedX = position.x;
smoothedY = position.y;
}
// Ensure coordinates are within screen bounds
const gazeScreen = document.getElementById('gaze-screen');
smoothedX = Math.max(0, Math.min(smoothedX, gazeScreen.offsetWidth));
smoothedY = Math.max(0, Math.min(smoothedY, gazeScreen.offsetHeight));
console.log('Updating gaze position:', {
raw: position,
smoothed: { x: smoothedX, y: smoothedY },
screenBounds: {
width: gazeScreen.offsetWidth,
height: gazeScreen.offsetHeight
}
});
// Update cursor position
this.currentGaze = { x: smoothedX, y: smoothedY };
this.gazeCursor.style.left = `${smoothedX}px`;
this.gazeCursor.style.top = `${smoothedY}px`;
// Update coordinates display
document.getElementById('coordinates').textContent =
`X: ${Math.round(smoothedX)}, Y: ${Math.round(smoothedY)}`;
// Update trail
this.updateTrail(smoothedX, smoothedY);
}
updateTrail(x, y) {
this.trailPoints.push({ x, y, time: Date.now() });
// Remove old points
if (this.trailPoints.length > this.maxTrailLength) {
this.trailPoints.shift();
}
// Clear and redraw trail
this.trailCtx.clearRect(0, 0, this.trailCanvas.width, this.trailCanvas.height);
if (this.trailPoints.length > 1) {
this.trailCtx.beginPath();
this.trailCtx.moveTo(this.trailPoints[0].x, this.trailPoints[0].y);
for (let i = 1; i < this.trailPoints.length; i++) {
const point = this.trailPoints[i];
const prevPoint = this.trailPoints[i - 1];
// Gradient effect
const alpha = i / this.trailPoints.length;
this.trailCtx.strokeStyle = `rgba(0, 255, 0, ${alpha * 0.5})`;
this.trailCtx.lineWidth = 2;
this.trailCtx.beginPath();
this.trailCtx.moveTo(prevPoint.x, prevPoint.y);
this.trailCtx.lineTo(point.x, point.y);
this.trailCtx.stroke();
}
}
}
updatePerformanceMetrics(startTime) {
const endTime = performance.now();
const frameTime = endTime - startTime;
this.frameCount++;
if (endTime - this.lastTime >= 1000) {
this.fps = this.frameCount;
this.frameCount = 0;
this.lastTime = endTime;
document.getElementById('fps').textContent = `FPS: ${this.fps}`;
}
}
updateStatus(message, isConnected) {
const statusEl = document.getElementById('status');
statusEl.textContent = `Status: ${message}`;
statusEl.className = isConnected ? 'status-connected' : 'status-disconnected';
}
async calibrate() {
// Implement calibration logic
alert('Calibration feature coming soon!');
}
}
// Initialize tracker when page loads
let tracker;
window.addEventListener('DOMContentLoaded', () => {
tracker = new GazeTracker();
});
</script>
</body>
</html>