Beta / public /screenshare.js
Rox-Turbo's picture
Update public/screenshare.js
1a6c5c2 verified
/**
* Rox AI - Enterprise Screen Share Module with Vision Capabilities
* @fileoverview Production-grade desktop screen sharing with LLM vision processing
* @version 3.0.0
* @author Rox AI Technologies
* @license MIT
* @requires ES2020+
*/
'use strict';
/**
* Configuration Constants - Centralized tuning parameters
* @readonly
*/
const SCREEN_SHARE_CONFIG = Object.freeze({
SILENCE_TIMEOUT_MS: 2500,
COOLDOWN_MS: 1000,
ERROR_COOLDOWN_MS: 2000,
CANCEL_COOLDOWN_MS: 1200,
TTS_WORD_THRESHOLD: 8,
TTS_RATE: 1.1,
TTS_PITCH: 1.0,
MIN_DESKTOP_WIDTH: 1024,
DRAG_DEBOUNCE_MS: 16, // ~60fps
API_ENDPOINT: '/api/chat',
SUPPORTED_SURFACES: ['monitor'], // 'window', 'browser' excluded for full desktop capture
});
/**
* Structured Logger - Disable in production via ROX_DEBUG=false
*/
const RoxLogger = {
_enabled: typeof window !== 'undefined' && (window.ROX_DEBUG ?? true),
debug: (msg, ...args) => RoxLogger._enabled && console.debug(`[ScreenShare] ${msg}`, ...args),
info: (msg, ...args) => RoxLogger._enabled && console.info(`[ScreenShare] ${msg}`, ...args),
warn: (msg, ...args) => console.warn(`[ScreenShare] ${msg}`, ...args),
error: (msg, ...args) => console.error(`[ScreenShare] ${msg}`, ...args),
};
/**
* Screen Share State Machine
* @readonly
* @enum {string}
*/
const ScreenShareState = Object.freeze({
IDLE: 'idle',
LISTENING: 'listening',
PROCESSING: 'processing',
SPEAKING: 'speaking',
COOLDOWN: 'cooldown',
ERROR: 'error'
});
/**
* Error Classification for Retry Logic
* @readonly
* @enum {string}
*/
const ErrorType = Object.freeze({
RETRYABLE: 'retryable', // Network, timeout
FATAL: 'fatal', // Permission denied, API error
USER: 'user', // Cancelled, invalid input
MEDIA: 'media' // Microphone, screen capture
});
/**
* Enterprise Screen Share Manager
* Handles secure screen capture, voice input, LLM processing, and TTS output
*
* @example
* const manager = new ScreenShareManager(roxAIInstance);
* await manager.start();
*/
class ScreenShareManager {
/**
* @param {Object} roxAI - Main Rox AI instance with models array
* @throws {TypeError} If roxAI instance is invalid
*/
constructor(roxAI) {
if (!roxAI || !Array.isArray(roxAI.models)) {
throw new TypeError('Valid RoxAI instance with models array required');
}
/** @type {Object} */
this.roxAI = roxAI;
/** @type {MediaStream|null} */
this.screenStream = null;
/** @type {string|null} */
this.selectedLLM = null;
/** @type {ScreenShareState} */
this._state = ScreenShareState.IDLE;
/** @type {SpeechRecognition|null} */
this.recognition = null;
/** @type {HTMLElement|null} */
this.floatingWindow = null;
/** @type {string} */
this.currentPrompt = '';
/** @type {string} */
this.currentResponse = '';
/** @type {number|null} */
this.silenceTimer = null;
/** @type {HTMLCanvasElement|null} */
this.canvas = null;
/** @type {CanvasRenderingContext2D|null} */
this.canvasCtx = null;
/** @type {HTMLVideoElement|null} */
this.videoElement = null; // Pooled for reuse
/** @type {AbortController|null} */
this.abortController = null;
/** @type {number|null} */
this.cooldownTimer = null;
/** @type {Array<string>} */
this._ttsQueue = [];
/** @type {boolean} */
this._isSpeaking = false;
/** @type {boolean} */
this._ttsStreamEnded = false;
/** @type {Function|null} */
this._onTTSComplete = null;
/** @type {Array<{type: string, listener: Function, element: EventTarget}>} */
this._boundListeners = [];
/** @type {MutationObserver|null} */
this._alwaysOnTopObserver = null;
/** @type {number} */
this._dragRafId = null;
/** @type {boolean} */
this._isDisposed = false;
/** @type {number} */
this._retryCount = 0;
this._initSpeechRecognition();
this._initPooledElements();
}
/**
* Initialize pooled DOM elements for performance
* @private
*/
_initPooledElements() {
// Pool video element to avoid GC pressure during screenshots
this.videoElement = document.createElement('video');
this.videoElement.setAttribute('playsinline', 'true');
this.videoElement.muted = true;
// Create canvas but don't add to DOM (offscreen)
this.canvas = document.createElement('canvas');
this.canvasCtx = this.canvas.getContext('2d', { alpha: false }); // Optimize for no transparency
}
/**
* Check if device is desktop using comprehensive heuristics
* @static
* @returns {boolean}
*/
static isDesktop() {
const hasCoarsePointer = window.matchMedia?.('(pointer: coarse)').matches ?? false;
const hasTouch = 'ontouchstart' in window || navigator.maxTouchPoints > 0;
const width = window.innerWidth;
return width >= SCREEN_SHARE_CONFIG.MIN_DESKTOP_WIDTH && !hasCoarsePointer && !hasTouch;
}
/**
* Initialize Web Speech API with error boundaries
* @private
*/
_initSpeechRecognition() {
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
if (!SpeechRecognition) {
RoxLogger.warn('Web Speech API not supported in this browser');
return;
}
try {
this.recognition = new SpeechRecognition();
this.recognition.continuous = true;
this.recognition.interimResults = true;
this.recognition.lang = 'en-US';
this.recognition.maxAlternatives = 1;
// Bind handlers once to preserve context and allow removal
this._handleRecognitionResult = this._handleRecognitionResult.bind(this);
this._handleRecognitionError = this._handleRecognitionError.bind(this);
this._handleRecognitionEnd = this._handleRecognitionEnd.bind(this);
this.recognition.addEventListener('result', this._handleRecognitionResult);
this.recognition.addEventListener('error', this._handleRecognitionError);
this.recognition.addEventListener('end', this._handleRecognitionEnd);
} catch (error) {
RoxLogger.error('Failed to initialize speech recognition:', error);
this.recognition = null;
}
}
/**
* Show LLM selection modal with accessibility support
* @returns {Promise<string|null>} Selected model ID or null if cancelled
*/
async showLLMSelector() {
return new Promise((resolve) => {
const modal = document.createElement('div');
modal.className = 'screenshare-modal-overlay';
modal.setAttribute('role', 'dialog');
modal.setAttribute('aria-modal', 'true');
modal.setAttribute('aria-labelledby', 'screenshare-modal-title');
const modelOptions = this.roxAI.models.map(model => {
const isNativeVision = model.id === 'rox-6-dyno';
return `
<button class="screenshare-llm-option" data-model="${model.id}" type="button">
<div class="screenshare-llm-info">
<span class="screenshare-llm-name">${model.name}</span>
<span class="screenshare-llm-desc">${model.desc}</span>
<span class="screenshare-llm-badge">${isNativeVision ? 'Native Vision' : 'Rox Vision'}</span>
</div>
<svg class="screenshare-llm-check" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" aria-hidden="true">
<polyline points="20 6 9 17 4 12"/>
</svg>
</button>
`;
}).join('');
modal.innerHTML = `
<div class="screenshare-modal">
<div class="screenshare-modal-header">
<h3 id="screenshare-modal-title">Select AI Model for Screen Analysis</h3>
<button class="screenshare-modal-close" aria-label="Close dialog" type="button">
<svg width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" aria-hidden="true">
<path d="M18 6L6 18M6 6l12 12"/>
</svg>
</button>
</div>
<div class="screenshare-modal-content">
<p class="screenshare-modal-desc">Choose which AI model will analyze your screen and voice commands:</p>
<div class="screenshare-llm-list" role="listbox">
${modelOptions}
</div>
</div>
</div>
`;
document.body.appendChild(modal);
modal.querySelector('.screenshare-llm-option')?.focus(); // Focus first option
const cleanup = (result = null) => {
modal.removeEventListener('click', handleClick);
document.removeEventListener('keydown', handleEscape);
modal.remove();
resolve(result);
};
const handleClick = (e) => {
const closeBtn = e.target.closest('.screenshare-modal-close');
const optionBtn = e.target.closest('.screenshare-llm-option');
if (e.target === modal || closeBtn) {
cleanup();
} else if (optionBtn) {
cleanup(optionBtn.dataset.model);
}
};
const handleEscape = (e) => {
if (e.key === 'Escape') {
cleanup();
}
};
modal.addEventListener('click', handleClick);
document.addEventListener('keydown', handleEscape);
});
}
/**
* FIXED: Validate screen capture surface type - Strict validation
* Handles browsers that don't support displaySurface (Firefox, Safari) via track.label fallback.
* @private
* @param {MediaStream} stream
* @returns {{valid: boolean, surface: string|null, warning: string|null}}
*/
_validateCaptureSurface(stream) {
const track = stream.getVideoTracks()[0];
if (!track) return { valid: false, surface: null, warning: 'No video track found' };
const settings = track.getSettings();
const surface = settings.displaySurface;
const label = track.label || '';
RoxLogger.info('Capture validation - Surface:', surface, 'Label:', label, 'Settings:', settings);
// FIX: Handle browsers that don't support displaySurface (Firefox, Safari)
// Firefox may use "screen" (lowercase); Linux window managers may report labels differently.
// Fallback to valid: true for unknown surfaces is intentional.
if (!surface) {
// Fallback: Try to infer from track label
const isLikelyTab = label.toLowerCase().includes('tab') ||
label.toLowerCase().includes('this tab');
const isLikelyWindow = label.toLowerCase().includes('window') &&
!label.toLowerCase().includes('screen') &&
!label.toLowerCase().includes('entire');
if (isLikelyTab) {
return {
valid: false,
surface: 'browser',
warning: 'Browser tab detected. You must select "Entire Screen" to capture other applications and tabs.'
};
}
if (isLikelyWindow) {
return {
valid: false,
surface: 'window',
warning: 'Specific window selected. Please select "Entire Screen" to capture all applications.'
};
}
RoxLogger.warn('Cannot determine display surface type, proceeding with caution');
return { valid: true, surface: 'unknown', warning: null };
}
// Standard validation for browsers that support displaySurface
if (!SCREEN_SHARE_CONFIG.SUPPORTED_SURFACES.includes(surface)) {
let warning = '';
switch (surface) {
case 'browser':
warning = 'You selected "Browser Tab". Please select "Entire Screen" instead to capture other applications.';
break;
case 'window':
warning = 'You selected a specific "Window". Please select "Entire Screen" to capture everything.';
break;
default:
warning = `Invalid selection: "${surface}". Please choose "Entire Screen".`;
}
return { valid: false, surface, warning };
}
return { valid: true, surface, warning: null };
}
/**
* FIXED: Show surface error dialog - No "Continue Anyway" option
* @private
* @param {string} message
* @returns {Promise<boolean>} True if user wants to retry, false if cancelled
*/
_showSurfaceError(message) {
return new Promise((resolve) => {
if (this.roxAI.showDialog) {
this.roxAI.showDialog({
type: 'error',
title: 'Wrong Screen Selection',
message: `${message}\n\nYou must select "Entire Screen" (your whole desktop) for this feature to work properly.`,
showCancel: true,
confirmText: 'Try Again',
cancelText: 'Cancel',
onConfirm: () => resolve(true),
onCancel: () => resolve(false)
});
} else {
const result = confirm(
`${message}\n\nYou must select "Entire Screen".\n\nClick OK to try again.`
);
resolve(result);
}
});
}
/**
* Start screen sharing workflow - FIXED VERSION
* Forces entire screen capture and validates strictly.
* @returns {Promise<void>}
* @throws {Error} If desktop check fails or permission denied
*/
async start() {
if (this._isDisposed) {
throw new Error('ScreenShareManager has been disposed');
}
if (!ScreenShareManager.isDesktop()) {
this.roxAI.showDialog?.({
type: 'warning',
title: 'Desktop Required',
message: 'Screen Share requires a desktop computer with a mouse and keyboard.',
showCancel: false
});
return;
}
// Select LLM
this.selectedLLM = await this.showLLMSelector();
if (!this.selectedLLM) {
RoxLogger.info('User cancelled LLM selection');
return;
}
try {
// CRITICAL FIX: Use exact constraints where supported to force monitor selection
const constraints = {
video: {
cursor: 'always',
displaySurface: { exact: 'monitor' },
logicalSurface: true,
width: { ideal: 1920 },
height: { ideal: 1080 }
},
audio: false,
selfBrowserSurface: 'exclude',
surfaceSwitching: 'include'
};
try {
this.screenStream = await navigator.mediaDevices.getDisplayMedia(constraints);
} catch (constraintError) {
RoxLogger.warn('Exact monitor constraint failed, trying ideal:', constraintError);
constraints.video.displaySurface = 'monitor';
this.screenStream = await navigator.mediaDevices.getDisplayMedia(constraints);
}
// CRITICAL FIX: Validate immediately and strictly
const validation = await this._validateCaptureSurface(this.screenStream);
if (!validation.valid) {
RoxLogger.error('Invalid surface selected:', validation.surface);
this._cleanupStream();
if ((this._retryCount || 0) > 3) {
throw new Error('Too many failed attempts to select Entire Screen');
}
const shouldRetry = await this._showSurfaceError(validation.warning);
if (shouldRetry) {
this._retryCount = (this._retryCount || 0) + 1;
return await this.start();
}
return;
}
this._retryCount = 0; // Reset on successful validation
// Setup video element for screenshots - attach to DOM so browsers don't throttle
this.videoElement.srcObject = this.screenStream;
this.videoElement.playsInline = true;
this.videoElement.muted = true;
this.videoElement.style.position = 'fixed';
this.videoElement.style.top = '-9999px';
this.videoElement.style.left = '-9999px';
this.videoElement.style.width = '1px';
this.videoElement.style.height = '1px';
this.videoElement.style.opacity = '0';
this.videoElement.style.pointerEvents = 'none';
document.body.appendChild(this.videoElement);
await this.videoElement.play();
// Race condition protection: verify track is still active after play
const videoTrack = this.screenStream.getVideoTracks()[0];
if (videoTrack && videoTrack.readyState === 'ended') {
throw new Error('Track ended immediately after selection');
}
// Create UI
this._createFloatingWindow();
this._setState(ScreenShareState.LISTENING);
this._startRecognition();
// Handle user clicking "Stop sharing" in browser chrome
const track = this.screenStream.getVideoTracks()[0];
if (track) {
track.addEventListener('ended', () => {
RoxLogger.info('Screen share stopped via browser UI');
this.stop();
}, { once: true });
// Monitor for surface changes (user switches from monitor to tab via browser UI)
this._videoTrack = track;
this._surfaceChangeListener = () => {
const newSettings = track.getSettings();
RoxLogger.info('Track configuration changed:', newSettings.displaySurface);
if (newSettings.displaySurface && newSettings.displaySurface !== 'monitor') {
RoxLogger.error('User switched to non-monitor surface');
this._showSurfaceError('You switched away from Entire Screen. Please restart and select Entire Screen.');
this.stop();
}
};
track.addEventListener('configurationchange', this._surfaceChangeListener);
}
} catch (error) {
RoxLogger.error('Start failed:', error);
if (error.name === 'NotAllowedError') {
this.roxAI.showDialog?.({
type: 'error',
title: 'Permission Denied',
message: 'Screen sharing permission was denied. Please allow access to your entire screen.',
showCancel: false
});
} else if (error.name === 'OverconstrainedError') {
this.roxAI.showDialog?.({
type: 'error',
title: 'Constraint Not Supported',
message: 'Your browser does not support forcing "Entire Screen" selection. Please manually select "Entire Screen" when prompted.',
showCancel: false
});
} else {
this.roxAI.showDialog?.({
type: 'error',
title: 'Screen Share Failed',
message: `Could not start: ${error.message}`,
showCancel: false
});
}
this._cleanup();
}
}
/**
* Stop all operations and dispose resources
*/
stop() {
if (this._state === ScreenShareState.IDLE) return;
RoxLogger.info('Stopping screen share session');
this._setState(ScreenShareState.IDLE);
// Abort pending API calls
if (this.abortController) {
this.abortController.abort();
this.abortController = null;
}
// Clear timers
this._clearAllTimers();
// Cancel TTS
if (window.speechSynthesis) {
window.speechSynthesis.cancel();
}
this._ttsQueue = [];
this._isSpeaking = false;
this._ttsStreamEnded = false;
this._onTTSComplete = null;
// Stop recognition
if (this.recognition) {
try {
this.recognition.stop();
} catch (e) {
// Already stopped
}
}
// Cleanup stream
this._cleanupStream();
// Remove UI
if (this.floatingWindow) {
this.floatingWindow.remove();
this.floatingWindow = null;
}
// Disconnect observer
if (this._alwaysOnTopObserver) {
this._alwaysOnTopObserver.disconnect();
this._alwaysOnTopObserver = null;
}
// Remove listeners
this._removeAllListeners();
// Cancel pending animation frames
if (this._dragRafId) {
cancelAnimationFrame(this._dragRafId);
this._dragRafId = null;
}
// Reset state
this.currentPrompt = '';
this.currentResponse = '';
this.selectedLLM = null;
RoxLogger.info('Cleanup complete');
}
/**
* Dispose manager permanently (cannot be reused after this)
*/
dispose() {
this.stop();
this._isDisposed = true;
// Clean up pooled elements
if (this.videoElement) {
this.videoElement.srcObject = null;
this.videoElement = null;
}
if (this.canvas) {
this.canvas.width = 0; // Free memory
this.canvas.height = 0;
this.canvas = null;
}
this.canvasCtx = null;
// Remove recognition listeners
if (this.recognition) {
this.recognition.removeEventListener('result', this._handleRecognitionResult);
this.recognition.removeEventListener('error', this._handleRecognitionError);
this.recognition.removeEventListener('end', this._handleRecognitionEnd);
this.recognition = null;
}
// Defensive cleanup of track/listener and retry state
this._videoTrack = null;
this._surfaceChangeListener = null;
this._retryCount = 0;
}
/**
* Cleanup media stream only. Removes hidden video element from DOM if present.
* @private
*/
_cleanupStream() {
if (this._surfaceChangeListener && this._videoTrack) {
this._videoTrack.removeEventListener('configurationchange', this._surfaceChangeListener);
this._surfaceChangeListener = null;
this._videoTrack = null;
}
if (this.screenStream) {
this.screenStream.getTracks().forEach(track => track.stop());
this.screenStream = null;
}
if (this.videoElement) {
this.videoElement.pause();
this.videoElement.srcObject = null;
if (this.videoElement.parentNode) {
this.videoElement.parentNode.removeChild(this.videoElement);
}
}
}
/**
* Clear all active timers
* @private
*/
_clearAllTimers() {
if (this.silenceTimer) {
clearTimeout(this.silenceTimer);
this.silenceTimer = null;
}
if (this.cooldownTimer) {
clearTimeout(this.cooldownTimer);
this.cooldownTimer = null;
}
}
/**
* State machine transition with validation
* @private
* @param {ScreenShareState} newState
*/
_setState(newState) {
const validTransitions = {
[ScreenShareState.IDLE]: [ScreenShareState.LISTENING],
[ScreenShareState.LISTENING]: [ScreenShareState.PROCESSING, ScreenShareState.IDLE, ScreenShareState.ERROR],
[ScreenShareState.PROCESSING]: [ScreenShareState.SPEAKING, ScreenShareState.LISTENING, ScreenShareState.IDLE, ScreenShareState.ERROR],
[ScreenShareState.SPEAKING]: [ScreenShareState.COOLDOWN, ScreenShareState.LISTENING, ScreenShareState.IDLE],
[ScreenShareState.COOLDOWN]: [ScreenShareState.LISTENING, ScreenShareState.IDLE],
[ScreenShareState.ERROR]: [ScreenShareState.LISTENING, ScreenShareState.IDLE]
};
if (this._state !== newState && validTransitions[this._state]?.includes(newState)) {
RoxLogger.debug(`State transition: ${this._state} -> ${newState}`);
this._state = newState;
this._updateUIForState();
} else if (this._state !== newState) {
RoxLogger.warn(`Invalid state transition attempted: ${this._state} -> ${newState}`);
}
}
/**
* Update UI based on state
* @private
*/
_updateUIForState() {
if (!this.floatingWindow) return;
const elements = {
listening: this.floatingWindow.querySelector('.screenshare-listening-indicator'),
speaking: this.floatingWindow.querySelector('.screenshare-speaking-indicator'),
processing: this.floatingWindow.querySelector('.screenshare-processing-indicator'),
cancelBtn: this.floatingWindow.querySelector('.screenshare-cancel-btn')
};
// Hide all first
Object.values(elements).forEach(el => {
if (el) el.style.display = 'none';
});
switch (this._state) {
case ScreenShareState.LISTENING:
if (elements.listening) elements.listening.style.display = 'inline-flex';
break;
case ScreenShareState.PROCESSING:
if (elements.processing) elements.processing.style.display = 'inline-flex';
if (elements.cancelBtn) elements.cancelBtn.style.display = 'inline-flex';
break;
case ScreenShareState.SPEAKING:
if (elements.speaking) elements.speaking.style.display = 'inline-flex';
if (elements.cancelBtn) elements.cancelBtn.style.display = 'inline-flex';
break;
}
}
/**
* Create floating control window
* @private
*/
_createFloatingWindow() {
this.floatingWindow = document.createElement('div');
this.floatingWindow.className = 'screenshare-floating-window';
this.floatingWindow.setAttribute('role', 'dialog');
this.floatingWindow.setAttribute('aria-label', 'Screen Share Control Panel');
const modelName = this._getModelName(this.selectedLLM);
this.floatingWindow.innerHTML = `
<div class="screenshare-window-header">
<div class="screenshare-window-title">
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" aria-hidden="true">
<rect x="2" y="3" width="20" height="14" rx="2"/>
<line x1="8" y1="21" x2="16" y2="21"/>
<line x1="12" y1="17" x2="12" y2="21"/>
</svg>
<span>Screen Share: ${modelName}</span>
</div>
<div class="screenshare-window-actions">
<button class="screenshare-window-minimize" title="Minimize" aria-label="Minimize" type="button">
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" aria-hidden="true">
<line x1="5" y1="12" x2="19" y2="12"/>
</svg>
</button>
<button class="screenshare-window-close" title="Stop Screen Share" aria-label="Stop screen share" type="button">
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" aria-hidden="true">
<path d="M18 6L6 18M6 6l12 12"/>
</svg>
</button>
</div>
</div>
<div class="screenshare-window-content">
<div class="screenshare-section">
<div class="screenshare-section-header">
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" aria-hidden="true">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"/>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"/>
<line x1="12" y1="19" x2="12" y2="23"/>
<line x1="8" y1="23" x2="16" y2="23"/>
</svg>
<span>Voice Input</span>
<span class="screenshare-listening-indicator" style="display: none;">
<span class="screenshare-pulse"></span> Listening...
</span>
</div>
<div class="screenshare-prompt-display" id="screensharePrompt">
<span class="screenshare-placeholder">Start speaking to analyze screen...</span>
</div>
</div>
<div class="screenshare-section">
<div class="screenshare-section-header">
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" aria-hidden="true">
<path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"/>
</svg>
<span>AI Analysis</span>
<button class="screenshare-cancel-btn" id="screenshareCancelBtn" style="display: none;" title="Cancel" type="button">
<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" aria-hidden="true">
<circle cx="12" cy="12" r="10"/>
<line x1="15" y1="9" x2="9" y2="15"/>
<line x1="9" y1="9" x2="15" y2="15"/>
</svg>
</button>
<span class="screenshare-speaking-indicator" style="display: none;">
<span class="screenshare-wave"></span> Speaking...
</span>
<span class="screenshare-processing-indicator" style="display: none;">
<span class="screenshare-spinner"></span> Analyzing...
</span>
</div>
<div class="screenshare-response-display" id="screenshareResponse">
<span class="screenshare-placeholder">Analysis will appear here...</span>
</div>
</div>
</div>
`;
document.body.appendChild(this.floatingWindow);
this._initDraggable();
this._bindWindowEvents();
this._initAlwaysOnTop();
}
/**
* Bind window control events
* @private
*/
_bindWindowEvents() {
const closeBtn = this.floatingWindow.querySelector('.screenshare-window-close');
const minimizeBtn = this.floatingWindow.querySelector('.screenshare-window-minimize');
const cancelBtn = this.floatingWindow.querySelector('.screenshare-cancel-btn');
this._addListener(closeBtn, 'click', () => this.stop());
this._addListener(minimizeBtn, 'click', () => {
this.floatingWindow.classList.toggle('screenshare-minimized');
});
this._addListener(cancelBtn, 'click', () => this._cancelAndListen());
}
/**
* Track listeners for cleanup
* @private
*/
_addListener(element, type, listener) {
if (!element) return;
element.addEventListener(type, listener);
this._boundListeners.push({ element, type, listener });
}
/**
* Remove all tracked listeners
* @private
*/
_removeAllListeners() {
this._boundListeners.forEach(({ element, type, listener }) => {
element?.removeEventListener(type, listener);
});
this._boundListeners = [];
}
/**
* Keep window on top using MutationObserver
* @private
*/
_initAlwaysOnTop() {
if (this._alwaysOnTopObserver) return;
this._alwaysOnTopObserver = new MutationObserver((mutations) => {
if (this.floatingWindow && document.body.lastElementChild !== this.floatingWindow) {
document.body.appendChild(this.floatingWindow);
}
});
this._alwaysOnTopObserver.observe(document.body, {
childList: true,
subtree: false
});
}
/**
* Initialize drag functionality with RAF throttling
* @private
*/
_initDraggable() {
const header = this.floatingWindow.querySelector('.screenshare-window-header');
if (!header) return;
let isDragging = false;
let startX, startY, initialLeft, initialTop;
const onMouseDown = (e) => {
if (e.target.closest('.screenshare-window-actions')) return;
isDragging = true;
startX = e.clientX;
startY = e.clientY;
initialLeft = this.floatingWindow.offsetLeft;
initialTop = this.floatingWindow.offsetTop;
header.style.cursor = 'grabbing';
e.preventDefault();
};
const onMouseMove = (e) => {
if (!isDragging) return;
if (this._dragRafId) cancelAnimationFrame(this._dragRafId);
this._dragRafId = requestAnimationFrame(() => {
const dx = e.clientX - startX;
const dy = e.clientY - startY;
const newX = Math.max(0, Math.min(
initialLeft + dx,
window.innerWidth - this.floatingWindow.offsetWidth
));
const newY = Math.max(0, Math.min(
initialTop + dy,
window.innerHeight - this.floatingWindow.offsetHeight
));
this.floatingWindow.style.left = `${newX}px`;
this.floatingWindow.style.top = `${newY}px`;
});
};
const onMouseUp = () => {
isDragging = false;
header.style.cursor = 'grab';
if (this._dragRafId) {
cancelAnimationFrame(this._dragRafId);
this._dragRafId = null;
}
};
this._addListener(header, 'mousedown', onMouseDown);
this._addListener(document, 'mousemove', onMouseMove);
this._addListener(document, 'mouseup', onMouseUp);
}
/**
* Start speech recognition safely
* @private
*/
_startRecognition() {
if (!this.recognition || this._state !== ScreenShareState.LISTENING) return;
try {
this.recognition.start();
RoxLogger.debug('Recognition started');
} catch (e) {
if (e.name !== 'InvalidStateError') {
RoxLogger.warn('Recognition start error:', e.message);
}
}
}
/**
* Stop speech recognition
* @private
*/
_stopRecognition() {
if (!this.recognition) return;
try {
this.recognition.stop();
RoxLogger.debug('Recognition stopped');
} catch (e) {
// Already stopped
}
}
/**
* Handle speech recognition results
* @private
*/
_handleRecognitionResult(event) {
if (this._state !== ScreenShareState.LISTENING) return;
let interimTranscript = '';
let finalTranscript = '';
for (let i = event.resultIndex; i < event.results.length; i++) {
const transcript = event.results[i][0].transcript;
if (event.results[i].isFinal) {
finalTranscript += transcript + ' ';
} else {
interimTranscript += transcript;
}
}
if (finalTranscript) {
this.currentPrompt += finalTranscript;
this._updatePromptDisplay();
this._resetSilenceTimer();
}
if (interimTranscript) {
this._updatePromptDisplay(interimTranscript);
}
}
/**
* Handle recognition errors with retry logic
* @private
*/
_handleRecognitionError(event) {
RoxLogger.error('Recognition error:', event.error);
if (this._state === ScreenShareState.LISTENING) {
if (['no-speech', 'audio-capture', 'network'].includes(event.error)) {
// Retryable errors
setTimeout(() => this._startRecognition(), 300);
} else if (event.error !== 'aborted') {
// Fatal error
this._setState(ScreenShareState.ERROR);
this._enterCooldown(SCREEN_SHARE_CONFIG.ERROR_COOLDOWN_MS);
}
}
}
/**
* Handle unexpected recognition end
* @private
*/
_handleRecognitionEnd() {
if (this._state === ScreenShareState.LISTENING) {
RoxLogger.debug('Recognition ended unexpectedly, restarting...');
setTimeout(() => this._startRecognition(), 150);
}
}
/**
* Update prompt display
* @private
*/
_updatePromptDisplay(interim = '') {
const promptEl = document.getElementById('screensharePrompt');
if (!promptEl) return;
const fullText = this.currentPrompt + interim;
if (fullText.trim()) {
promptEl.textContent = fullText;
promptEl.classList.remove('screenshare-placeholder');
} else {
promptEl.innerHTML = '<span class="screenshare-placeholder">Start speaking to analyze screen...</span>';
}
}
/**
* Reset silence detection timer
* @private
*/
_resetSilenceTimer() {
if (this.silenceTimer) clearTimeout(this.silenceTimer);
this.silenceTimer = setTimeout(() => {
if (this.currentPrompt.trim() && this._state === ScreenShareState.LISTENING) {
this._processPrompt();
}
}, SCREEN_SHARE_CONFIG.SILENCE_TIMEOUT_MS);
}
/**
* Capture screenshot from video stream
* OPTIMIZED: Reuses pooled video element and canvas
* @private
* @returns {Promise<Blob>}
*/
async _captureScreenshot() {
if (!this.screenStream || !this.videoElement) {
throw new Error('Screen stream not available');
}
try {
// Ensure video is playing and has dimensions
if (this.videoElement.paused || this.videoElement.readyState < 2) {
await new Promise((resolve, reject) => {
const timeout = setTimeout(() => reject(new Error('Video load timeout')), 5000);
this.videoElement.onloadeddata = () => {
clearTimeout(timeout);
resolve();
};
this.videoElement.play().catch(reject);
});
}
const width = this.videoElement.videoWidth;
const height = this.videoElement.videoHeight;
// Resize canvas only if needed (avoid unnecessary allocations)
if (this.canvas.width !== width || this.canvas.height !== height) {
this.canvas.width = width;
this.canvas.height = height;
}
// Clear and draw
this.canvasCtx.clearRect(0, 0, width, height);
this.canvasCtx.drawImage(this.videoElement, 0, 0, width, height);
// Convert to blob with quality optimization
return new Promise((resolve, reject) => {
this.canvas.toBlob((blob) => {
if (blob) {
RoxLogger.debug(`Screenshot captured: ${blob.size} bytes`);
resolve(blob);
} else {
reject(new Error('Canvas to Blob conversion failed'));
}
}, 'image/png', 0.95);
});
} catch (error) {
RoxLogger.error('Screenshot capture failed:', error);
throw error;
}
}
/**
* Process user prompt with LLM
* @private
*/
async _processPrompt() {
if (this._state !== ScreenShareState.LISTENING || !this.currentPrompt.trim()) return;
this._setState(ScreenShareState.PROCESSING);
this._stopRecognition();
this._clearAllTimers();
const prompt = this.currentPrompt.trim();
this.currentPrompt = '';
this._updatePromptDisplay();
try {
// Capture screenshot
const screenshot = await this._captureScreenshot();
// Prepare multipart form data
const formData = new FormData();
formData.append('message', prompt);
formData.append('model', this.selectedLLM);
formData.append('conversationId', `screenshare-${Date.now()}`);
formData.append('files', screenshot, 'screenshot.png');
// API request with abort support
this.abortController = new AbortController();
const response = await fetch(SCREEN_SHARE_CONFIG.API_ENDPOINT, {
method: 'POST',
body: formData,
signal: this.abortController.signal,
headers: {
'Accept': 'text/event-stream, application/json'
}
});
if (!response.ok) {
throw new Error(`API Error ${response.status}: ${response.statusText}`);
}
await this._handleStreamResponse(response);
} catch (error) {
if (error.name !== 'AbortError') {
RoxLogger.error('Processing error:', error);
this._updateResponseDisplay(`Error: ${error.message}. Retrying...`, false);
this._enterCooldown(SCREEN_SHARE_CONFIG.ERROR_COOLDOWN_MS);
}
} finally {
this.abortController = null;
}
}
/**
* Handle streaming SSE response
* @private
*/
async _handleStreamResponse(response) {
const reader = response.body.getReader();
const decoder = new TextDecoder();
let fullResponse = '';
const responseEl = document.getElementById('screenshareResponse');
if (responseEl) {
responseEl.innerHTML = '';
responseEl.classList.remove('screenshare-placeholder');
}
// Reset TTS state
this._ttsQueue = [];
this._isSpeaking = false;
this._ttsStreamEnded = false;
this._setState(ScreenShareState.SPEAKING);
// Completion handler
this._onTTSComplete = () => {
RoxLogger.debug('TTS complete, entering cooldown');
this._enterCooldown(SCREEN_SHARE_CONFIG.COOLDOWN_MS);
};
try {
while (true) {
const { done, value } = await reader.read();
if (done) {
this._ttsStreamEnded = true;
this._processTTSQueue(); // Flush remaining
break;
}
const chunk = decoder.decode(value, { stream: true });
const lines = chunk.split('\n');
for (const line of lines) {
if (line.startsWith('data: ')) {
try {
const data = JSON.parse(line.slice(6));
if (data.chunk) {
fullResponse += data.chunk;
this._updateResponseDisplay(fullResponse, true);
this._queueForSpeech(data.chunk);
}
} catch (e) {
// Ignore malformed JSON lines
}
}
}
}
this._updateResponseDisplay(fullResponse, false);
this.currentResponse = fullResponse;
} catch (error) {
if (error.name !== 'AbortError') throw error;
}
}
/**
* Queue text for TTS with intelligent buffering
* @private
*/
_queueForSpeech(text) {
this._ttsQueue.push(text);
const currentBuffer = this._ttsQueue.join('');
const sentenceEnders = /[.!?]\s*$/;
const wordCount = currentBuffer.trim().split(/\s+/).length;
// Start speaking if we have a complete sentence or enough words
if ((sentenceEnders.test(currentBuffer) || wordCount >= SCREEN_SHARE_CONFIG.TTS_WORD_THRESHOLD) && !this._isSpeaking) {
this._processTTSQueue();
}
}
/**
* Process TTS queue with voice selection
* @private
*/
_processTTSQueue() {
if (this._isSpeaking || this._ttsQueue.length === 0) {
if (this._ttsQueue.length === 0 && this._ttsStreamEnded && this._onTTSComplete) {
const cb = this._onTTSComplete;
this._onTTSComplete = null;
cb();
}
return;
}
// Concatenate queue for smooth speech
const textToSpeak = this._ttsQueue.join('');
this._ttsQueue = [];
// Clean markdown and special chars
const cleanText = textToSpeak
.replace(/[*`#\[\]()]/g, '')
.replace(/\n+/g, ' ')
.replace(/\s+/g, ' ')
.trim();
if (!cleanText) {
this._processTTSQueue(); // Skip empty
return;
}
this._isSpeaking = true;
const utterance = new SpeechSynthesisUtterance(cleanText);
utterance.rate = SCREEN_SHARE_CONFIG.TTS_RATE;
utterance.pitch = SCREEN_SHARE_CONFIG.TTS_PITCH;
utterance.volume = 1.0;
// Select best voice (prefer Google or Microsoft voices for quality)
const voices = window.speechSynthesis.getVoices();
const preferredVoice = voices.find(v =>
v.name.includes('Google') || v.name.includes('Microsoft') || v.lang === 'en-US'
);
if (preferredVoice) utterance.voice = preferredVoice;
utterance.onend = () => {
this._isSpeaking = false;
this._processTTSQueue(); // Continue with next
};
utterance.onerror = (event) => {
RoxLogger.error('TTS error:', event);
this._isSpeaking = false;
this._processTTSQueue();
};
window.speechSynthesis.speak(utterance);
}
/**
* Enter cooldown before resuming listening
* @private
*/
_enterCooldown(durationMs = SCREEN_SHARE_CONFIG.COOLDOWN_MS) {
if (this._state === ScreenShareState.IDLE) return;
this._setState(ScreenShareState.COOLDOWN);
this.cooldownTimer = setTimeout(() => {
if (this._state === ScreenShareState.COOLDOWN) {
this._setState(ScreenShareState.LISTENING);
this._startRecognition();
}
}, durationMs);
}
/**
* Update response display with markdown
* @private
*/
_updateResponseDisplay(text, isStreaming = true) {
const responseEl = document.getElementById('screenshareResponse');
if (!responseEl) return;
let html = this._parseMarkdown(text);
if (isStreaming) {
html += '<span class="streaming-cursor" aria-hidden="true"></span>';
}
responseEl.innerHTML = html;
responseEl.scrollTop = responseEl.scrollHeight;
}
/**
* Safe markdown parser (XSS protected)
* @private
*/
_parseMarkdown(text) {
if (!text) return '';
// Escape HTML first
let html = text
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;');
// Convert markdown (on escaped text)
const rules = [
[/^### (.+)$/gm, '<h3>$1</h3>'],
[/^## (.+)$/gm, '<h2>$1</h2>'],
[/^# (.+)$/gm, '<h1>$1</h1>'],
[/\*\*(.+?)\*\*/g, '<strong>$1</strong>'],
[/\*(.+?)\*/g, '<em>$1</em>'],
[/```([\s\S]*?)```/g, '<pre><code>$1</code></pre>'],
[/([^`])`([^`]+?)`([^`])/g, '$1<code>$2</code>$3'],
[/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2" target="_blank" rel="noopener noreferrer">$1</a>']
];
rules.forEach(([pattern, replacement]) => {
html = html.replace(pattern, replacement);
});
// Paragraphs
return html.split(/\n\n+/).map(p => p.trim() ? `<p>${p}</p>` : '').join('');
}
/**
* Cancel current operation and return to listening
* @private
*/
_cancelAndListen() {
if (this.abortController) {
this.abortController.abort();
this.abortController = null;
}
window.speechSynthesis?.cancel();
this._ttsQueue = [];
this._isSpeaking = false;
this._ttsStreamEnded = false;
this._onTTSComplete = null;
this._clearAllTimers();
this._updateResponseDisplay('Cancelled. Listening...', false);
this._enterCooldown(SCREEN_SHARE_CONFIG.CANCEL_COOLDOWN_MS);
}
/**
* Get model display name
* @private
*/
_getModelName(modelId) {
const model = this.roxAI?.models?.find(m => m.id === modelId);
return model?.name || modelId;
}
}
// Global exports
if (typeof window !== 'undefined') {
window.ScreenShareManager = ScreenShareManager;
window.ScreenShareState = ScreenShareState;
window.RoxLogger = RoxLogger;
}
// Preload voices for TTS (Chrome bug workaround)
if (typeof window !== 'undefined' && window.speechSynthesis) {
window.speechSynthesis.getVoices();
window.speechSynthesis.onvoiceschanged = () => window.speechSynthesis.getVoices();
}