yusef75 commited on
Commit
033af1d
Β·
verified Β·
1 Parent(s): fe1f47a

Upload 2 files

Browse files
Files changed (2) hide show
  1. browser-agent.js +277 -0
  2. index.js +395 -359
browser-agent.js ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Browser Agent - AI-Powered Browser Automation with Vision
3
+ * Uses Qwen VL to see screenshots and decide actions like a human
4
+ */
5
+
6
+ // Vision Model Configuration
7
+ export const VISION_MODEL = 'qwen/qwen-2.5-vl-7b-instruct:free';
8
+
9
+ // Get OpenRouter API Keys with fallback (same pattern as Vercel)
10
+ function getOpenRouterKeys() {
11
+ const keys = [];
12
+ for (let i = 1; i <= 10; i++) {
13
+ const key = process.env[`OPENROUTER_API_KEY_${i}`];
14
+ if (key && key.trim()) keys.push(key.trim());
15
+ }
16
+ // Also check the base key
17
+ if (process.env.OPENROUTER_API_KEY) {
18
+ keys.push(process.env.OPENROUTER_API_KEY.trim());
19
+ }
20
+ return keys;
21
+ }
22
+
23
+ /**
24
+ * Analyze screenshot with Vision AI and decide next action
25
+ */
26
+ async function analyzeWithVision(screenshotBase64, task, previousSteps = [], currentUrl = '') {
27
+ const stepHistory = previousSteps.map((s, i) =>
28
+ `Step ${i + 1}: ${s.action.type} - ${s.action.description || ''}`
29
+ ).join('\n');
30
+
31
+ const prompt = `You are a browser automation agent. You can see the current webpage screenshot.
32
+
33
+ TASK: ${task}
34
+
35
+ CURRENT URL: ${currentUrl}
36
+
37
+ PREVIOUS STEPS:
38
+ ${stepHistory || 'None yet'}
39
+
40
+ Based on what you see in the screenshot, decide the NEXT ACTION to complete the task.
41
+
42
+ Respond in this exact JSON format:
43
+ {
44
+ "observation": "Brief description of what you see on the page",
45
+ "thinking": "Your reasoning about what to do next",
46
+ "action": {
47
+ "type": "click" | "type" | "scroll" | "goto" | "wait" | "done",
48
+ "x": 500,
49
+ "y": 300,
50
+ "text": "text to type if action is type",
51
+ "url": "url if action is goto",
52
+ "direction": "up or down if scroll",
53
+ "description": "human readable description of this action"
54
+ },
55
+ "taskComplete": false,
56
+ "result": "Only fill this if taskComplete is true - the final answer/result"
57
+ }
58
+
59
+ IMPORTANT:
60
+ - If you see search results with the information needed, extract it and set taskComplete: true
61
+ - For click actions, estimate x,y coordinates based on where you see the element
62
+ - If you see a search box, type the search query
63
+ - If the page needs to scroll to see more, use scroll action
64
+ - Be efficient - don't take unnecessary steps`;
65
+
66
+ const keys = getOpenRouterKeys();
67
+ if (keys.length === 0) {
68
+ console.error('[BrowserAgent] No OpenRouter API keys found!');
69
+ return {
70
+ observation: 'No API keys configured',
71
+ thinking: 'Cannot analyze without API keys',
72
+ action: { type: 'wait', description: 'Waiting - no API keys' },
73
+ taskComplete: false
74
+ };
75
+ }
76
+
77
+ // Try each key until one works
78
+ for (const apiKey of keys) {
79
+ try {
80
+ const response = await fetch('https://openrouter.ai/api/v1/chat/completions', {
81
+ method: 'POST',
82
+ headers: {
83
+ 'Authorization': `Bearer ${apiKey}`,
84
+ 'Content-Type': 'application/json',
85
+ 'HTTP-Referer': 'https://luks-pied.vercel.app',
86
+ 'X-Title': 'Lukas Browser Agent'
87
+ },
88
+ body: JSON.stringify({
89
+ model: VISION_MODEL,
90
+ messages: [{
91
+ role: 'user',
92
+ content: [
93
+ { type: 'text', text: prompt },
94
+ {
95
+ type: 'image_url',
96
+ image_url: {
97
+ url: `data:image/png;base64,${screenshotBase64}`
98
+ }
99
+ }
100
+ ]
101
+ }],
102
+ max_tokens: 1000
103
+ })
104
+ });
105
+
106
+ if (response.status === 429) {
107
+ console.log('[BrowserAgent] Rate limited, trying next key...');
108
+ continue;
109
+ }
110
+
111
+ const data = await response.json();
112
+
113
+ if (data.choices && data.choices[0]?.message?.content) {
114
+ const content = data.choices[0].message.content;
115
+ // Extract JSON from response (handle markdown code blocks)
116
+ const jsonMatch = content.match(/\{[\s\S]*\}/);
117
+ if (jsonMatch) {
118
+ return JSON.parse(jsonMatch[0]);
119
+ }
120
+ }
121
+
122
+ if (data.error) {
123
+ console.log(`[BrowserAgent] API error: ${data.error.message}, trying next key...`);
124
+ continue;
125
+ }
126
+ } catch (error) {
127
+ console.log(`[BrowserAgent] Key failed: ${error.message}, trying next...`);
128
+ continue;
129
+ }
130
+ }
131
+
132
+ console.error('[BrowserAgent] All API keys failed');
133
+ return {
134
+ observation: 'Error analyzing screenshot',
135
+ thinking: 'All API keys failed',
136
+ action: { type: 'wait', description: 'Waiting due to error' },
137
+ taskComplete: false
138
+ };
139
+ }
140
+
141
+ /**
142
+ * Execute a browser action
143
+ */
144
+ async function executeAction(page, action) {
145
+ try {
146
+ switch (action.type) {
147
+ case 'click':
148
+ await page.mouse.click(action.x, action.y);
149
+ await page.waitForTimeout(1000);
150
+ break;
151
+
152
+ case 'type':
153
+ if (action.x && action.y) {
154
+ await page.mouse.click(action.x, action.y);
155
+ await page.waitForTimeout(300);
156
+ }
157
+ await page.keyboard.type(action.text, { delay: 50 });
158
+ await page.keyboard.press('Enter');
159
+ await page.waitForTimeout(2000);
160
+ break;
161
+
162
+ case 'scroll':
163
+ const amount = action.direction === 'up' ? -400 : 400;
164
+ await page.mouse.wheel(0, amount);
165
+ await page.waitForTimeout(500);
166
+ break;
167
+
168
+ case 'goto':
169
+ await page.goto(action.url, { waitUntil: 'domcontentloaded', timeout: 30000 });
170
+ await page.waitForTimeout(2000);
171
+ break;
172
+
173
+ case 'wait':
174
+ await page.waitForTimeout(2000);
175
+ break;
176
+
177
+ case 'done':
178
+ // Task is complete, no action needed
179
+ break;
180
+
181
+ default:
182
+ console.log('[BrowserAgent] Unknown action:', action.type);
183
+ }
184
+ return true;
185
+ } catch (error) {
186
+ console.error('[BrowserAgent] Action execution error:', error.message);
187
+ return false;
188
+ }
189
+ }
190
+
191
+ /**
192
+ * Run the Browser Agent loop
193
+ */
194
+ async function runBrowserAgent(page, task, socket, maxSteps = 10) {
195
+ console.log(`[BrowserAgent] Starting task: "${task}"`);
196
+
197
+ const steps = [];
198
+ let taskComplete = false;
199
+ let finalResult = null;
200
+
201
+ // Start by going to Google
202
+ await page.goto('https://www.google.com', { waitUntil: 'domcontentloaded' });
203
+ await page.waitForTimeout(1000);
204
+
205
+ for (let i = 0; i < maxSteps && !taskComplete; i++) {
206
+ console.log(`[BrowserAgent] Step ${i + 1}/${maxSteps}`);
207
+
208
+ // 1. Take screenshot
209
+ const screenshotBuffer = await page.screenshot({ type: 'png' });
210
+ const screenshotBase64 = screenshotBuffer.toString('base64');
211
+
212
+ // 2. Get current URL
213
+ const currentUrl = page.url();
214
+
215
+ // 3. Analyze with Vision AI
216
+ console.log('[BrowserAgent] Analyzing screenshot with Vision AI...');
217
+ const analysis = await analyzeWithVision(screenshotBase64, task, steps, currentUrl);
218
+
219
+ console.log(`[BrowserAgent] Observation: ${analysis.observation}`);
220
+ console.log(`[BrowserAgent] Thinking: ${analysis.thinking}`);
221
+ console.log(`[BrowserAgent] Action: ${analysis.action?.type} - ${analysis.action?.description}`);
222
+
223
+ // 4. Record step
224
+ const step = {
225
+ stepNumber: i + 1,
226
+ screenshot: screenshotBase64,
227
+ observation: analysis.observation,
228
+ thinking: analysis.thinking,
229
+ action: analysis.action,
230
+ timestamp: Date.now()
231
+ };
232
+ steps.push(step);
233
+
234
+ // 5. Send update to frontend
235
+ if (socket) {
236
+ socket.emit('agent:step', {
237
+ step: i + 1,
238
+ total: maxSteps,
239
+ screenshot: screenshotBase64,
240
+ observation: analysis.observation,
241
+ action: analysis.action?.description || analysis.action?.type,
242
+ taskComplete: analysis.taskComplete
243
+ });
244
+ }
245
+
246
+ // 6. Check if task is complete
247
+ if (analysis.taskComplete) {
248
+ taskComplete = true;
249
+ finalResult = analysis.result;
250
+ console.log('[BrowserAgent] Task completed!');
251
+ console.log('[BrowserAgent] Result:', finalResult);
252
+ break;
253
+ }
254
+
255
+ // 7. Execute the action
256
+ if (analysis.action && analysis.action.type !== 'done') {
257
+ await executeAction(page, analysis.action);
258
+ }
259
+ }
260
+
261
+ // Take final screenshot
262
+ const finalScreenshot = await page.screenshot({ type: 'png' });
263
+
264
+ return {
265
+ success: taskComplete,
266
+ steps: steps,
267
+ result: finalResult,
268
+ finalScreenshot: finalScreenshot.toString('base64'),
269
+ totalSteps: steps.length
270
+ };
271
+ }
272
+
273
+ export {
274
+ runBrowserAgent,
275
+ analyzeWithVision,
276
+ executeAction
277
+ };
index.js CHANGED
@@ -1,359 +1,395 @@
1
- /**
2
- * Lukas Worker - The Muscles
3
- * Browser automation server with Socket.io for real-time control and streaming
4
- * Deploy this to Hugging Face Spaces as a Docker container
5
- */
6
-
7
- import express from 'express';
8
- import { createServer } from 'http';
9
- import { Server } from 'socket.io';
10
- import { chromium } from 'playwright';
11
- import dotenv from 'dotenv';
12
-
13
- dotenv.config();
14
-
15
- const PORT = process.env.PORT || 7860;
16
- const WORKER_SECRET = process.env.WORKER_SECRET || 'lukas-dev-secret';
17
-
18
- const app = express();
19
- const httpServer = createServer(app);
20
-
21
- // Socket.io server with CORS for Vercel
22
- const io = new Server(httpServer, {
23
- cors: {
24
- origin: ['https://luks-pied.vercel.app', 'http://localhost:5173', 'http://localhost:3000'],
25
- methods: ['GET', 'POST'],
26
- credentials: true
27
- },
28
- transports: ['websocket', 'polling']
29
- });
30
-
31
- // Health check endpoint (Required for Hugging Face)
32
- app.get('/', (req, res) => {
33
- res.json({
34
- status: 'ok',
35
- service: 'Lukas Worker (The Muscles)',
36
- version: '1.0.0',
37
- ready: true
38
- });
39
- });
40
-
41
- app.get('/health', (req, res) => {
42
- res.json({ status: 'healthy', timestamp: new Date().toISOString() });
43
- });
44
-
45
- // =============================================================================
46
- // BROWSER MANAGEMENT
47
- // =============================================================================
48
-
49
- let browser = null;
50
- let browserContext = null;
51
- let activePage = null;
52
- let streamInterval = null;
53
- let connectedClient = null;
54
-
55
- async function initBrowser() {
56
- if (browser) return;
57
-
58
- console.log('πŸš€ Launching browser...');
59
- browser = await chromium.launch({
60
- headless: true,
61
- args: [
62
- '--no-sandbox',
63
- '--disable-setuid-sandbox',
64
- '--disable-dev-shm-usage',
65
- '--disable-accelerated-2d-canvas',
66
- '--no-first-run',
67
- '--no-zygote',
68
- '--disable-gpu'
69
- ]
70
- });
71
-
72
- browserContext = await browser.newContext({
73
- viewport: { width: 1280, height: 720 },
74
- userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
75
- });
76
-
77
- activePage = await browserContext.newPage();
78
- console.log('βœ… Browser ready');
79
- }
80
-
81
- async function closeBrowser() {
82
- if (browser) {
83
- await browser.close();
84
- browser = null;
85
- browserContext = null;
86
- activePage = null;
87
- console.log('πŸ”΄ Browser closed');
88
- }
89
- }
90
-
91
- // =============================================================================
92
- // STREAMING
93
- // =============================================================================
94
-
95
- async function startStreaming(socket) {
96
- if (streamInterval) clearInterval(streamInterval);
97
- if (!activePage) return;
98
-
99
- console.log('πŸ“Ί Starting live stream...');
100
-
101
- streamInterval = setInterval(async () => {
102
- try {
103
- if (!activePage) return;
104
-
105
- const screenshot = await activePage.screenshot({
106
- type: 'jpeg',
107
- quality: 60,
108
- fullPage: false
109
- });
110
-
111
- const base64 = screenshot.toString('base64');
112
- socket.emit('stream:frame', { image: base64 });
113
- } catch (error) {
114
- // Page might be navigating, ignore errors
115
- }
116
- }, 200); // ~5 FPS for smooth streaming
117
- }
118
-
119
- function stopStreaming() {
120
- if (streamInterval) {
121
- clearInterval(streamInterval);
122
- streamInterval = null;
123
- console.log('πŸ“Ί Stream stopped');
124
- }
125
- }
126
-
127
- // =============================================================================
128
- // SOCKET HANDLERS
129
- // =============================================================================
130
-
131
- io.use((socket, next) => {
132
- const token = socket.handshake.auth?.token;
133
-
134
- if (token === WORKER_SECRET) {
135
- console.log('βœ… Client authenticated');
136
- next();
137
- } else {
138
- console.log('❌ Authentication failed');
139
- next(new Error('Authentication failed'));
140
- }
141
- });
142
-
143
- io.on('connection', async (socket) => {
144
- console.log('πŸ”— Client connected:', socket.id);
145
-
146
- // Only allow one client at a time
147
- if (connectedClient && connectedClient !== socket.id) {
148
- socket.emit('error', { message: 'Another client is already connected' });
149
- socket.disconnect();
150
- return;
151
- }
152
-
153
- connectedClient = socket.id;
154
-
155
- // Initialize browser on first connection
156
- await initBrowser();
157
-
158
- // Start streaming automatically
159
- startStreaming(socket);
160
-
161
- // =========================================================================
162
- // COMMAND HANDLERS
163
- // =========================================================================
164
-
165
- socket.on('browser:goto', async (data, callback) => {
166
- try {
167
- const { url } = data;
168
- console.log(`🌐 Navigating to: ${url}`);
169
-
170
- await activePage.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
171
-
172
- const title = await activePage.title();
173
- callback({ success: true, title });
174
- } catch (error) {
175
- console.error('❌ Navigation error:', error.message);
176
- callback({ success: false, error: error.message });
177
- }
178
- });
179
-
180
- socket.on('browser:click', async (data, callback) => {
181
- try {
182
- const { selector, x, y } = data;
183
-
184
- if (x !== undefined && y !== undefined) {
185
- // Click by coordinates
186
- console.log(`πŸ–±οΈ Clicking at coordinates: (${x}, ${y})`);
187
- await activePage.mouse.click(x, y);
188
- } else if (selector) {
189
- // Click by selector
190
- console.log(`πŸ–±οΈ Clicking selector: ${selector}`);
191
- await activePage.click(selector, { timeout: 10000 });
192
- } else {
193
- throw new Error('Either selector or x,y coordinates required');
194
- }
195
-
196
- callback({ success: true });
197
- } catch (error) {
198
- console.error('❌ Click error:', error.message);
199
- callback({ success: false, error: error.message });
200
- }
201
- });
202
-
203
- socket.on('browser:type', async (data, callback) => {
204
- try {
205
- const { selector, text } = data;
206
-
207
- if (selector) {
208
- // Type into specific element
209
- console.log(`⌨️ Typing in selector: ${selector}`);
210
- await activePage.fill(selector, text);
211
- } else {
212
- // Type using keyboard (to focused element)
213
- console.log(`⌨️ Typing text: ${text.substring(0, 20)}...`);
214
- await activePage.keyboard.type(text, { delay: 30 });
215
- }
216
-
217
- callback({ success: true });
218
- } catch (error) {
219
- console.error('❌ Type error:', error.message);
220
- callback({ success: false, error: error.message });
221
- }
222
- });
223
-
224
- socket.on('browser:scroll', async (data, callback) => {
225
- try {
226
- const { direction = 'down', amount = 500 } = data;
227
- console.log(`πŸ“œ Scrolling ${direction}`);
228
-
229
- await activePage.evaluate((dir, amt) => {
230
- window.scrollBy(0, dir === 'down' ? amt : -amt);
231
- }, direction, amount);
232
-
233
- callback({ success: true });
234
- } catch (error) {
235
- callback({ success: false, error: error.message });
236
- }
237
- });
238
-
239
- socket.on('browser:screenshot', async (data, callback) => {
240
- try {
241
- console.log('πŸ“Έ Taking screenshot...');
242
-
243
- const screenshot = await activePage.screenshot({
244
- type: 'png',
245
- fullPage: data?.fullPage || false
246
- });
247
-
248
- callback({ success: true, image: screenshot.toString('base64') });
249
- } catch (error) {
250
- callback({ success: false, error: error.message });
251
- }
252
- });
253
-
254
- socket.on('browser:getContent', async (data, callback) => {
255
- try {
256
- console.log('πŸ“„ Getting page content...');
257
-
258
- const content = await activePage.content();
259
- const title = await activePage.title();
260
- const url = activePage.url();
261
-
262
- // Get text content for AI analysis
263
- const textContent = await activePage.evaluate(() => {
264
- return document.body.innerText.substring(0, 10000);
265
- });
266
-
267
- callback({ success: true, content, title, url, textContent });
268
- } catch (error) {
269
- callback({ success: false, error: error.message });
270
- }
271
- });
272
-
273
- socket.on('browser:getAccessibility', async (data, callback) => {
274
- try {
275
- console.log('🌳 Getting accessibility tree...');
276
-
277
- const tree = await activePage.accessibility.snapshot();
278
- callback({ success: true, tree });
279
- } catch (error) {
280
- callback({ success: false, error: error.message });
281
- }
282
- });
283
-
284
- socket.on('browser:execute', async (data, callback) => {
285
- try {
286
- const { action, params } = data;
287
- console.log(`⚑ Executing action: ${action}`);
288
-
289
- let result = null;
290
-
291
- switch (action) {
292
- case 'waitForSelector':
293
- await activePage.waitForSelector(params.selector, { timeout: params.timeout || 10000 });
294
- result = { found: true };
295
- break;
296
-
297
- case 'pressKey':
298
- await activePage.keyboard.press(params.key);
299
- result = { pressed: params.key };
300
- break;
301
-
302
- case 'goBack':
303
- await activePage.goBack();
304
- result = { navigated: true };
305
- break;
306
-
307
- case 'goForward':
308
- await activePage.goForward();
309
- result = { navigated: true };
310
- break;
311
-
312
- case 'reload':
313
- await activePage.reload();
314
- result = { reloaded: true };
315
- break;
316
-
317
- default:
318
- throw new Error(`Unknown action: ${action}`);
319
- }
320
-
321
- callback({ success: true, result });
322
- } catch (error) {
323
- callback({ success: false, error: error.message });
324
- }
325
- });
326
-
327
- // =========================================================================
328
- // DISCONNECT HANDLER
329
- // =========================================================================
330
-
331
- socket.on('disconnect', () => {
332
- console.log('πŸ”Œ Client disconnected:', socket.id);
333
- stopStreaming();
334
- connectedClient = null;
335
-
336
- // Don't close browser immediately, keep it warm for reconnection
337
- // closeBrowser();
338
- });
339
- });
340
-
341
- // =============================================================================
342
- // START SERVER
343
- // =============================================================================
344
-
345
- httpServer.listen(PORT, '0.0.0.0', () => {
346
- console.log('═══════════════════════════════════════════════════════════════');
347
- console.log(` 🦾 Lukas Worker (The Muscles) is running`);
348
- console.log(` πŸ“‘ Socket.io server: http://0.0.0.0:${PORT}`);
349
- console.log(` πŸ” Secret required for connection`);
350
- console.log('═══════════════════════════════════════════════════════════════');
351
- });
352
-
353
- // Graceful shutdown
354
- process.on('SIGTERM', async () => {
355
- console.log('πŸ›‘ Shutting down...');
356
- stopStreaming();
357
- await closeBrowser();
358
- process.exit(0);
359
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Lukas Worker - The Muscles
3
+ * Browser automation server with Socket.io for real-time control and streaming
4
+ * Deploy this to Hugging Face Spaces as a Docker container
5
+ */
6
+
7
+ import express from 'express';
8
+ import { createServer } from 'http';
9
+ import { Server } from 'socket.io';
10
+ import { chromium } from 'playwright';
11
+ import dotenv from 'dotenv';
12
+ import { runBrowserAgent } from './browser-agent.js';
13
+
14
+ dotenv.config();
15
+
16
+ const PORT = process.env.PORT || 7860;
17
+ const WORKER_SECRET = process.env.WORKER_SECRET || 'lukas-dev-secret';
18
+
19
+ const app = express();
20
+ const httpServer = createServer(app);
21
+
22
+ // Socket.io server with CORS for Vercel
23
+ const io = new Server(httpServer, {
24
+ cors: {
25
+ origin: ['https://luks-pied.vercel.app', 'http://localhost:5173', 'http://localhost:3000'],
26
+ methods: ['GET', 'POST'],
27
+ credentials: true
28
+ },
29
+ transports: ['websocket', 'polling']
30
+ });
31
+
32
+ // Health check endpoint (Required for Hugging Face)
33
+ app.get('/', (req, res) => {
34
+ res.json({
35
+ status: 'ok',
36
+ service: 'Lukas Worker (The Muscles)',
37
+ version: '1.0.0',
38
+ ready: true
39
+ });
40
+ });
41
+
42
+ app.get('/health', (req, res) => {
43
+ res.json({ status: 'healthy', timestamp: new Date().toISOString() });
44
+ });
45
+
46
+ // =============================================================================
47
+ // BROWSER MANAGEMENT
48
+ // =============================================================================
49
+
50
+ let browser = null;
51
+ let browserContext = null;
52
+ let activePage = null;
53
+ let streamInterval = null;
54
+ let connectedClient = null;
55
+
56
+ async function initBrowser() {
57
+ if (browser) return;
58
+
59
+ console.log('πŸš€ Launching browser...');
60
+ browser = await chromium.launch({
61
+ headless: true,
62
+ args: [
63
+ '--no-sandbox',
64
+ '--disable-setuid-sandbox',
65
+ '--disable-dev-shm-usage',
66
+ '--disable-accelerated-2d-canvas',
67
+ '--no-first-run',
68
+ '--no-zygote',
69
+ '--disable-gpu'
70
+ ]
71
+ });
72
+
73
+ browserContext = await browser.newContext({
74
+ viewport: { width: 1280, height: 720 },
75
+ userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
76
+ });
77
+
78
+ activePage = await browserContext.newPage();
79
+ console.log('βœ… Browser ready');
80
+ }
81
+
82
+ async function closeBrowser() {
83
+ if (browser) {
84
+ await browser.close();
85
+ browser = null;
86
+ browserContext = null;
87
+ activePage = null;
88
+ console.log('πŸ”΄ Browser closed');
89
+ }
90
+ }
91
+
92
+ // =============================================================================
93
+ // STREAMING
94
+ // =============================================================================
95
+
96
+ async function startStreaming(socket) {
97
+ if (streamInterval) clearInterval(streamInterval);
98
+ if (!activePage) return;
99
+
100
+ console.log('πŸ“Ί Starting live stream...');
101
+
102
+ streamInterval = setInterval(async () => {
103
+ try {
104
+ if (!activePage) return;
105
+
106
+ const screenshot = await activePage.screenshot({
107
+ type: 'jpeg',
108
+ quality: 60,
109
+ fullPage: false
110
+ });
111
+
112
+ const base64 = screenshot.toString('base64');
113
+ socket.emit('stream:frame', { image: base64 });
114
+ } catch (error) {
115
+ // Page might be navigating, ignore errors
116
+ }
117
+ }, 200); // ~5 FPS for smooth streaming
118
+ }
119
+
120
+ function stopStreaming() {
121
+ if (streamInterval) {
122
+ clearInterval(streamInterval);
123
+ streamInterval = null;
124
+ console.log('πŸ“Ί Stream stopped');
125
+ }
126
+ }
127
+
128
+ // =============================================================================
129
+ // SOCKET HANDLERS
130
+ // =============================================================================
131
+
132
+ io.use((socket, next) => {
133
+ const token = socket.handshake.auth?.token;
134
+
135
+ if (token === WORKER_SECRET) {
136
+ console.log('βœ… Client authenticated');
137
+ next();
138
+ } else {
139
+ console.log('❌ Authentication failed');
140
+ next(new Error('Authentication failed'));
141
+ }
142
+ });
143
+
144
+ io.on('connection', async (socket) => {
145
+ console.log('πŸ”— Client connected:', socket.id);
146
+
147
+ // Only allow one client at a time
148
+ if (connectedClient && connectedClient !== socket.id) {
149
+ socket.emit('error', { message: 'Another client is already connected' });
150
+ socket.disconnect();
151
+ return;
152
+ }
153
+
154
+ connectedClient = socket.id;
155
+
156
+ // Initialize browser on first connection
157
+ await initBrowser();
158
+
159
+ // Start streaming automatically
160
+ startStreaming(socket);
161
+
162
+ // =========================================================================
163
+ // COMMAND HANDLERS
164
+ // =========================================================================
165
+
166
+ socket.on('browser:goto', async (data, callback) => {
167
+ try {
168
+ const { url } = data;
169
+ console.log(`🌐 Navigating to: ${url}`);
170
+
171
+ await activePage.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
172
+
173
+ const title = await activePage.title();
174
+ callback({ success: true, title });
175
+ } catch (error) {
176
+ console.error('❌ Navigation error:', error.message);
177
+ callback({ success: false, error: error.message });
178
+ }
179
+ });
180
+
181
+ socket.on('browser:click', async (data, callback) => {
182
+ try {
183
+ const { selector, x, y } = data;
184
+
185
+ if (x !== undefined && y !== undefined) {
186
+ // Click by coordinates
187
+ console.log(`πŸ–±οΈ Clicking at coordinates: (${x}, ${y})`);
188
+ await activePage.mouse.click(x, y);
189
+ } else if (selector) {
190
+ // Click by selector
191
+ console.log(`πŸ–±οΈ Clicking selector: ${selector}`);
192
+ await activePage.click(selector, { timeout: 10000 });
193
+ } else {
194
+ throw new Error('Either selector or x,y coordinates required');
195
+ }
196
+
197
+ callback({ success: true });
198
+ } catch (error) {
199
+ console.error('❌ Click error:', error.message);
200
+ callback({ success: false, error: error.message });
201
+ }
202
+ });
203
+
204
+ socket.on('browser:type', async (data, callback) => {
205
+ try {
206
+ const { selector, text } = data;
207
+
208
+ if (selector) {
209
+ // Type into specific element
210
+ console.log(`⌨️ Typing in selector: ${selector}`);
211
+ await activePage.fill(selector, text);
212
+ } else {
213
+ // Type using keyboard (to focused element)
214
+ console.log(`⌨️ Typing text: ${text.substring(0, 20)}...`);
215
+ await activePage.keyboard.type(text, { delay: 30 });
216
+ }
217
+
218
+ callback({ success: true });
219
+ } catch (error) {
220
+ console.error('❌ Type error:', error.message);
221
+ callback({ success: false, error: error.message });
222
+ }
223
+ });
224
+
225
+ socket.on('browser:scroll', async (data, callback) => {
226
+ try {
227
+ const { direction = 'down', amount = 500 } = data;
228
+ console.log(`πŸ“œ Scrolling ${direction}`);
229
+
230
+ await activePage.evaluate((dir, amt) => {
231
+ window.scrollBy(0, dir === 'down' ? amt : -amt);
232
+ }, direction, amount);
233
+
234
+ callback({ success: true });
235
+ } catch (error) {
236
+ callback({ success: false, error: error.message });
237
+ }
238
+ });
239
+
240
+ socket.on('browser:screenshot', async (data, callback) => {
241
+ try {
242
+ console.log('πŸ“Έ Taking screenshot...');
243
+
244
+ const screenshot = await activePage.screenshot({
245
+ type: 'png',
246
+ fullPage: data?.fullPage || false
247
+ });
248
+
249
+ callback({ success: true, image: screenshot.toString('base64') });
250
+ } catch (error) {
251
+ callback({ success: false, error: error.message });
252
+ }
253
+ });
254
+
255
+ socket.on('browser:getContent', async (data, callback) => {
256
+ try {
257
+ console.log('πŸ“„ Getting page content...');
258
+
259
+ const content = await activePage.content();
260
+ const title = await activePage.title();
261
+ const url = activePage.url();
262
+
263
+ // Get text content for AI analysis
264
+ const textContent = await activePage.evaluate(() => {
265
+ return document.body.innerText.substring(0, 10000);
266
+ });
267
+
268
+ callback({ success: true, content, title, url, textContent });
269
+ } catch (error) {
270
+ callback({ success: false, error: error.message });
271
+ }
272
+ });
273
+
274
+ socket.on('browser:getAccessibility', async (data, callback) => {
275
+ try {
276
+ console.log('🌳 Getting accessibility tree...');
277
+
278
+ const tree = await activePage.accessibility.snapshot();
279
+ callback({ success: true, tree });
280
+ } catch (error) {
281
+ callback({ success: false, error: error.message });
282
+ }
283
+ });
284
+
285
+ socket.on('browser:execute', async (data, callback) => {
286
+ try {
287
+ const { action, params } = data;
288
+ console.log(`⚑ Executing action: ${action}`);
289
+
290
+ let result = null;
291
+
292
+ switch (action) {
293
+ case 'waitForSelector':
294
+ await activePage.waitForSelector(params.selector, { timeout: params.timeout || 10000 });
295
+ result = { found: true };
296
+ break;
297
+
298
+ case 'pressKey':
299
+ await activePage.keyboard.press(params.key);
300
+ result = { pressed: params.key };
301
+ break;
302
+
303
+ case 'goBack':
304
+ await activePage.goBack();
305
+ result = { navigated: true };
306
+ break;
307
+
308
+ case 'goForward':
309
+ await activePage.goForward();
310
+ result = { navigated: true };
311
+ break;
312
+
313
+ case 'reload':
314
+ await activePage.reload();
315
+ result = { reloaded: true };
316
+ break;
317
+
318
+ default:
319
+ throw new Error(`Unknown action: ${action}`);
320
+ }
321
+
322
+ callback({ success: true, result });
323
+ } catch (error) {
324
+ callback({ success: false, error: error.message });
325
+ }
326
+ });
327
+
328
+ // =========================================================================
329
+ // DISCONNECT HANDLER
330
+ // =========================================================================
331
+
332
+ // =========================================================================
333
+ // BROWSER AGENT (AI-POWERED)
334
+ // =========================================================================
335
+
336
+ socket.on('browser:agent', async (data, callback) => {
337
+ try {
338
+ const { task, maxSteps = 10 } = data;
339
+ console.log('πŸ€– [Agent] Starting AI Browser Agent...');
340
+ console.log(`🎯 [Agent] Task: "${task}"`);
341
+
342
+ if (!activePage) {
343
+ await initBrowser();
344
+ }
345
+
346
+ // Run the browser agent with Vision AI
347
+ const result = await runBrowserAgent(activePage, task, socket, maxSteps);
348
+
349
+ console.log(`βœ… [Agent] Completed in ${result.totalSteps} steps`);
350
+ callback({
351
+ success: result.success,
352
+ result: result.result,
353
+ steps: result.steps.map(s => ({
354
+ stepNumber: s.stepNumber,
355
+ observation: s.observation,
356
+ action: s.action?.description || s.action?.type
357
+ })),
358
+ finalScreenshot: result.finalScreenshot,
359
+ totalSteps: result.totalSteps
360
+ });
361
+ } catch (error) {
362
+ console.error('❌ [Agent] Error:', error.message);
363
+ callback({ success: false, error: error.message });
364
+ }
365
+ });
366
+
367
+ socket.on('disconnect', () => {
368
+ console.log('πŸ”Œ Client disconnected:', socket.id);
369
+ stopStreaming();
370
+ connectedClient = null;
371
+
372
+ // Don't close browser immediately, keep it warm for reconnection
373
+ // closeBrowser();
374
+ });
375
+ });
376
+
377
+ // =============================================================================
378
+ // START SERVER
379
+ // =============================================================================
380
+
381
+ httpServer.listen(PORT, '0.0.0.0', () => {
382
+ console.log('═══════════════════════════════════════════════════════════════');
383
+ console.log(` 🦾 Lukas Worker (The Muscles) is running`);
384
+ console.log(` πŸ“‘ Socket.io server: http://0.0.0.0:${PORT}`);
385
+ console.log(` πŸ” Secret required for connection`);
386
+ console.log('═══════════════════════════════════════════════════════════════');
387
+ });
388
+
389
+ // Graceful shutdown
390
+ process.on('SIGTERM', async () => {
391
+ console.log('πŸ›‘ Shutting down...');
392
+ stopStreaming();
393
+ await closeBrowser();
394
+ process.exit(0);
395
+ });