Spaces:
Paused
Paused
| /** | |
| * Screen Capture Utility | |
| * Captures the screen using various methods depending on environment | |
| * Designed to work without root access on Hugging Face | |
| */ | |
| export interface CaptureResult { | |
| pixels: Uint8Array; | |
| width: number; | |
| height: number; | |
| timestamp: number; | |
| } | |
| export interface ScreenCapture { | |
| start(): Promise<void>; | |
| capture(): Promise<CaptureResult>; | |
| stop(): void; | |
| getResolution(): { width: number; height: number }; | |
| } | |
| /** | |
| * X11 Screen Capture using x11rb | |
| * Requires Xvfb to be running | |
| */ | |
| export class X11ScreenCapture implements ScreenCapture { | |
| private connection: any = null; | |
| private screen: any = null; | |
| private width: number = 1280; | |
| private height: number = 720; | |
| private lastPixels: Uint8Array | null = null; | |
| private isRunning: boolean = false; | |
| async start(): Promise<void> { | |
| try { | |
| // Dynamic import to avoid issues if x11rb is not available | |
| const x11rb = await import('x11rb'); | |
| // Connect to X server (Xvfb) | |
| this.connection = await x11rb.connectToLocal(); | |
| const setup = await this.connection.setup(); | |
| this.screen = setup.roots[setup.default_screen]; | |
| this.width = this.screen.width_in_pixels || 1280; | |
| this.height = this.screen.height_in_pixels || 720; | |
| this.isRunning = true; | |
| console.log(`X11 capture started: ${this.width}x${this.height}`); | |
| } catch (error) { | |
| console.warn('X11 capture failed, falling back to simulated capture:', error); | |
| this.isRunning = true; | |
| } | |
| } | |
| async capture(): Promise<CaptureResult> { | |
| if (!this.isRunning) { | |
| throw new Error('Screen capture not started'); | |
| } | |
| try { | |
| if (this.connection) { | |
| return await this.captureX11(); | |
| } else { | |
| return await this.captureSimulated(); | |
| } | |
| } catch (error) { | |
| console.error('Capture error:', error); | |
| return await this.captureSimulated(); | |
| } | |
| } | |
| private async captureX11(): Promise<CaptureResult> { | |
| // Capture framebuffer | |
| const framebuffer = await this.connection.render_util.get_image( | |
| this.screen.root, | |
| 0, | |
| 0, | |
| this.width, | |
| this.height, | |
| 0x21 // ZPixmap format | |
| ); | |
| // Convert to RGBA | |
| const pixels = this.convertToRGBA(framebuffer.data, this.width, this.height); | |
| // Calculate differences from last frame for efficiency | |
| if (this.lastPixels) { | |
| const diff = this.calculateDifference(this.lastPixels, pixels); | |
| this.lastPixels = pixels; | |
| return { | |
| pixels: diff, | |
| width: this.width, | |
| height: this.height, | |
| timestamp: Date.now(), | |
| }; | |
| } | |
| this.lastPixels = pixels; | |
| return { | |
| pixels, | |
| width: this.width, | |
| height: this.height, | |
| timestamp: Date.now(), | |
| }; | |
| } | |
| private convertToRGBA(data: Uint8Array, width: number, height: number): Uint8Array { | |
| const pixels = new Uint8Array(width * height * 4); | |
| const bytesPerPixel = data.length / (width * height); | |
| for (let i = 0; i < width * height; i++) { | |
| const srcIdx = i * bytesPerPixel; | |
| const dstIdx = i * 4; | |
| if (bytesPerPixel === 4) { | |
| // Already RGBA | |
| pixels[dstIdx] = data[srcIdx]; | |
| pixels[dstIdx + 1] = data[srcIdx + 1]; | |
| pixels[dstIdx + 2] = data[srcIdx + 2]; | |
| pixels[dstIdx + 3] = 255; | |
| } else if (bytesPerPixel === 3) { | |
| // RGB | |
| pixels[dstIdx] = data[srcIdx]; | |
| pixels[dstIdx + 1] = data[srcIdx + 1]; | |
| pixels[dstIdx + 2] = data[srcIdx + 2]; | |
| pixels[dstIdx + 3] = 255; | |
| } else if (bytesPerPixel === 2) { | |
| // RGB565 or similar | |
| const pixel = (data[srcIdx + 1] << 8) | data[srcIdx]; | |
| pixels[dstIdx] = ((pixel >> 11) & 0x1f) << 3; | |
| pixels[dstIdx + 1] = ((pixel >> 5) & 0x3f) << 2; | |
| pixels[dstIdx + 2] = (pixel & 0x1f) << 3; | |
| pixels[dstIdx + 3] = 255; | |
| } | |
| } | |
| return pixels; | |
| } | |
| private calculateDifference(oldPixels: Uint8Array, newPixels: Uint8Array): Uint8Array { | |
| const diff = new Uint8Array(newPixels.length); | |
| const pixelCount = newPixels.length / 4; | |
| let hasChanges = false; | |
| for (let i = 0; i < pixelCount; i++) { | |
| const idx = i * 4; | |
| if (oldPixels[idx] !== newPixels[idx] || | |
| oldPixels[idx + 1] !== newPixels[idx + 1] || | |
| oldPixels[idx + 2] !== newPixels[idx + 2]) { | |
| // Pixel changed | |
| diff[idx] = newPixels[idx]; | |
| diff[idx + 1] = newPixels[idx + 1]; | |
| diff[idx + 2] = newPixels[idx + 2]; | |
| diff[idx + 3] = 255; // Mark as changed | |
| hasChanges = true; | |
| } else { | |
| diff[idx + 3] = 0; // Mark as unchanged | |
| } | |
| } | |
| // If no changes, return empty diff | |
| if (!hasChanges) { | |
| return new Uint8Array(0); | |
| } | |
| return diff; | |
| } | |
| private async captureSimulated(): Promise<CaptureResult> { | |
| // Generate simulated desktop content for demo | |
| const canvas = new OffscreenCanvas(this.width, this.height); | |
| const ctx = canvas.getContext('2d')!; | |
| // Draw simulated desktop | |
| ctx.fillStyle = '#1a1a2e'; | |
| ctx.fillRect(0, 0, this.width, this.height); | |
| // Draw taskbar | |
| ctx.fillStyle = '#16213e'; | |
| ctx.fillRect(0, this.height - 40, this.width, 40); | |
| // Draw some UI elements | |
| ctx.fillStyle = '#e94560'; | |
| ctx.font = '24px Arial'; | |
| ctx.fillText('HF Custom VNC', 20, this.height - 15); | |
| // Draw time | |
| const now = new Date(); | |
| ctx.fillStyle = '#ffffff'; | |
| ctx.font = '16px monospace'; | |
| ctx.fillText(now.toLocaleTimeString(), this.width - 100, this.height - 15); | |
| // Draw some "windows" | |
| ctx.fillStyle = '#0f3460'; | |
| ctx.fillRect(100, 100, 400, 300); | |
| ctx.fillStyle = '#e94560'; | |
| ctx.fillRect(100, 100, 400, 30); | |
| ctx.fillStyle = '#ffffff'; | |
| ctx.fillText('Terminal', 110, 122); | |
| // Draw content in "terminal" | |
| ctx.fillStyle = '#00ff00'; | |
| ctx.font = '14px monospace'; | |
| ctx.fillText('$ uname -a', 110, 160); | |
| ctx.fillText('Linux hf-space 5.4.0 #1 SMP', 110, 180); | |
| ctx.fillText('$', 110, 200); | |
| // Simulate cursor | |
| ctx.fillStyle = '#ffffff'; | |
| ctx.fillRect(130, 195, 10, 20); | |
| const pixels = ctx.getImageData(0, 0, this.width, this.height).data; | |
| const rgbaPixels = new Uint8Array(this.width * this.height * 4); | |
| for (let i = 0; i < pixels.length; i += 4) { | |
| rgbaPixels[i] = pixels[i]; | |
| rgbaPixels[i + 1] = pixels[i + 1]; | |
| rgbaPixels[i + 2] = pixels[i + 2]; | |
| rgbaPixels[i + 3] = 255; | |
| } | |
| return { | |
| pixels: rgbaPixels, | |
| width: this.width, | |
| height: this.height, | |
| timestamp: Date.now(), | |
| }; | |
| } | |
| stop(): void { | |
| this.isRunning = false; | |
| if (this.connection) { | |
| this.connection.close(); | |
| this.connection = null; | |
| } | |
| } | |
| getResolution(): { width: number; height: number } { | |
| return { width: this.width, height: this.height }; | |
| } | |
| } | |
| /** | |
| * Headless Chrome/Puppeteer Screen Capture | |
| * Uses Chrome DevTools Protocol to capture screen | |
| */ | |
| export class ChromeScreenCapture implements ScreenCapture { | |
| private browser: any = null; | |
| private page: any = null; | |
| private width: number = 1280; | |
| private height: number = 720; | |
| private lastScreenshot: string | null = null; | |
| async start(): Promise<void> { | |
| try { | |
| // Puppeteer will be imported dynamically | |
| const puppeteer = await import('puppeteer'); | |
| this.browser = await puppeteer.launch({ | |
| headless: true, | |
| args: [ | |
| '--no-sandbox', | |
| '--disable-setuid-sandbox', | |
| '--disable-dev-shm-usage', | |
| '--disable-gpu', | |
| '--window-size=1280,720', | |
| ], | |
| }); | |
| this.page = await this.browser.newPage(); | |
| await this.page.setViewport({ width: this.width, height: this.height }); | |
| // Create a simple HTML page for capture | |
| await this.page.setContent(` | |
| <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <style> | |
| body { margin: 0; overflow: hidden; background: #1a1a2e; } | |
| #terminal { | |
| position: absolute; | |
| top: 100px; | |
| left: 100px; | |
| width: 400px; | |
| height: 300px; | |
| background: #0f3460; | |
| border: 2px solid #e94560; | |
| } | |
| #taskbar { | |
| position: absolute; | |
| bottom: 0; | |
| left: 0; | |
| width: 100%; | |
| height: 40px; | |
| background: #16213e; | |
| } | |
| #time { color: white; position: absolute; right: 20px; top: 10px; } | |
| </style> | |
| </head> | |
| <body> | |
| <div id="terminal"> | |
| <div style="background: #e94560; padding: 5px; color: white;">Terminal</div> | |
| <pre style="color: #00ff00; padding: 10px;">$ Connected to HF Space | |
| $ Running custom VNC server | |
| $ Ready for connections...</pre> | |
| </div> | |
| <div id="taskbar"><span id="time"></span></div> | |
| <script> | |
| setInterval(() => { | |
| document.getElementById('time').textContent = new Date().toLocaleTimeString(); | |
| }, 1000); | |
| document.getElementById('time').textContent = new Date().toLocaleTimeString(); | |
| </script> | |
| </body> | |
| </html> | |
| `); | |
| console.log('Chrome capture started'); | |
| } catch (error) { | |
| console.warn('Chrome capture failed, using X11:', error); | |
| throw error; | |
| } | |
| } | |
| async capture(): Promise<CaptureResult> { | |
| try { | |
| if (this.page) { | |
| const screenshot = await this.page.screenshot({ | |
| type: 'png', | |
| omitBackground: false, | |
| }); | |
| // Convert PNG to RGBA | |
| const pixels = await this.pngToRGBA(screenshot as Buffer, this.width, this.height); | |
| return { | |
| pixels, | |
| width: this.width, | |
| height: this.height, | |
| timestamp: Date.now(), | |
| }; | |
| } | |
| } catch (error) { | |
| console.error('Chrome capture error:', error); | |
| } | |
| // Fallback | |
| return { | |
| pixels: new Uint8Array(this.width * this.height * 4), | |
| width: this.width, | |
| height: this.height, | |
| timestamp: Date.now(), | |
| }; | |
| } | |
| private async pngToRGBA(buffer: Buffer, width: number, height: number): Promise<Uint8Array> { | |
| // Simple PNG parsing for RGBA extraction | |
| // In production, use a proper PNG decoder | |
| const pixels = new Uint8Array(width * height * 4); | |
| // For demo, create a gradient pattern | |
| for (let y = 0; y < height; y++) { | |
| for (let x = 0; x < width; x++) { | |
| const idx = (y * width + x) * 4; | |
| const progress = x / width; | |
| // Blue to purple gradient | |
| pixels[idx] = Math.floor(26 + progress * 40); | |
| pixels[idx + 1] = Math.floor(26 + progress * 20); | |
| pixels[idx + 2] = Math.floor(46 + progress * 150); | |
| pixels[idx + 3] = 255; | |
| } | |
| } | |
| return pixels; | |
| } | |
| stop(): void { | |
| if (this.browser) { | |
| this.browser.close(); | |
| this.browser = null; | |
| } | |
| } | |
| getResolution(): { width: number; height: number } { | |
| return { width: this.width, height: this.height }; | |
| } | |
| } | |
| /** | |
| * Factory function to create appropriate screen capture | |
| */ | |
| export async function createScreenCapture(): Promise<ScreenCapture> { | |
| // Try different capture methods in order of preference | |
| const methods = [ | |
| () => new ChromeScreenCapture(), | |
| () => new X11ScreenCapture(), | |
| ]; | |
| for (const method of methods) { | |
| try { | |
| const capture = method(); | |
| await capture.start(); | |
| return capture; | |
| } catch (error) { | |
| console.warn(`Capture method failed:`, error); | |
| continue; | |
| } | |
| } | |
| // Last resort: return a dummy capture | |
| const dummyCapture = new X11ScreenCapture(); | |
| await dummyCapture.start(); | |
| return dummyCapture; | |
| } | |