Spaces:
Paused
Paused
Upload 2 files
Browse files- browser-agent.js +277 -0
- index.js +395 -359
browser-agent.js
ADDED
|
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Browser Agent - AI-Powered Browser Automation with Vision
|
| 3 |
+
* Uses Qwen VL to see screenshots and decide actions like a human
|
| 4 |
+
*/
|
| 5 |
+
|
| 6 |
+
// Vision Model Configuration
|
| 7 |
+
export const VISION_MODEL = 'qwen/qwen-2.5-vl-7b-instruct:free';
|
| 8 |
+
|
| 9 |
+
// Get OpenRouter API Keys with fallback (same pattern as Vercel)
|
| 10 |
+
function getOpenRouterKeys() {
|
| 11 |
+
const keys = [];
|
| 12 |
+
for (let i = 1; i <= 10; i++) {
|
| 13 |
+
const key = process.env[`OPENROUTER_API_KEY_${i}`];
|
| 14 |
+
if (key && key.trim()) keys.push(key.trim());
|
| 15 |
+
}
|
| 16 |
+
// Also check the base key
|
| 17 |
+
if (process.env.OPENROUTER_API_KEY) {
|
| 18 |
+
keys.push(process.env.OPENROUTER_API_KEY.trim());
|
| 19 |
+
}
|
| 20 |
+
return keys;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
/**
|
| 24 |
+
* Analyze screenshot with Vision AI and decide next action
|
| 25 |
+
*/
|
| 26 |
+
async function analyzeWithVision(screenshotBase64, task, previousSteps = [], currentUrl = '') {
|
| 27 |
+
const stepHistory = previousSteps.map((s, i) =>
|
| 28 |
+
`Step ${i + 1}: ${s.action.type} - ${s.action.description || ''}`
|
| 29 |
+
).join('\n');
|
| 30 |
+
|
| 31 |
+
const prompt = `You are a browser automation agent. You can see the current webpage screenshot.
|
| 32 |
+
|
| 33 |
+
TASK: ${task}
|
| 34 |
+
|
| 35 |
+
CURRENT URL: ${currentUrl}
|
| 36 |
+
|
| 37 |
+
PREVIOUS STEPS:
|
| 38 |
+
${stepHistory || 'None yet'}
|
| 39 |
+
|
| 40 |
+
Based on what you see in the screenshot, decide the NEXT ACTION to complete the task.
|
| 41 |
+
|
| 42 |
+
Respond in this exact JSON format:
|
| 43 |
+
{
|
| 44 |
+
"observation": "Brief description of what you see on the page",
|
| 45 |
+
"thinking": "Your reasoning about what to do next",
|
| 46 |
+
"action": {
|
| 47 |
+
"type": "click" | "type" | "scroll" | "goto" | "wait" | "done",
|
| 48 |
+
"x": 500,
|
| 49 |
+
"y": 300,
|
| 50 |
+
"text": "text to type if action is type",
|
| 51 |
+
"url": "url if action is goto",
|
| 52 |
+
"direction": "up or down if scroll",
|
| 53 |
+
"description": "human readable description of this action"
|
| 54 |
+
},
|
| 55 |
+
"taskComplete": false,
|
| 56 |
+
"result": "Only fill this if taskComplete is true - the final answer/result"
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
IMPORTANT:
|
| 60 |
+
- If you see search results with the information needed, extract it and set taskComplete: true
|
| 61 |
+
- For click actions, estimate x,y coordinates based on where you see the element
|
| 62 |
+
- If you see a search box, type the search query
|
| 63 |
+
- If the page needs to scroll to see more, use scroll action
|
| 64 |
+
- Be efficient - don't take unnecessary steps`;
|
| 65 |
+
|
| 66 |
+
const keys = getOpenRouterKeys();
|
| 67 |
+
if (keys.length === 0) {
|
| 68 |
+
console.error('[BrowserAgent] No OpenRouter API keys found!');
|
| 69 |
+
return {
|
| 70 |
+
observation: 'No API keys configured',
|
| 71 |
+
thinking: 'Cannot analyze without API keys',
|
| 72 |
+
action: { type: 'wait', description: 'Waiting - no API keys' },
|
| 73 |
+
taskComplete: false
|
| 74 |
+
};
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
// Try each key until one works
|
| 78 |
+
for (const apiKey of keys) {
|
| 79 |
+
try {
|
| 80 |
+
const response = await fetch('https://openrouter.ai/api/v1/chat/completions', {
|
| 81 |
+
method: 'POST',
|
| 82 |
+
headers: {
|
| 83 |
+
'Authorization': `Bearer ${apiKey}`,
|
| 84 |
+
'Content-Type': 'application/json',
|
| 85 |
+
'HTTP-Referer': 'https://luks-pied.vercel.app',
|
| 86 |
+
'X-Title': 'Lukas Browser Agent'
|
| 87 |
+
},
|
| 88 |
+
body: JSON.stringify({
|
| 89 |
+
model: VISION_MODEL,
|
| 90 |
+
messages: [{
|
| 91 |
+
role: 'user',
|
| 92 |
+
content: [
|
| 93 |
+
{ type: 'text', text: prompt },
|
| 94 |
+
{
|
| 95 |
+
type: 'image_url',
|
| 96 |
+
image_url: {
|
| 97 |
+
url: `data:image/png;base64,${screenshotBase64}`
|
| 98 |
+
}
|
| 99 |
+
}
|
| 100 |
+
]
|
| 101 |
+
}],
|
| 102 |
+
max_tokens: 1000
|
| 103 |
+
})
|
| 104 |
+
});
|
| 105 |
+
|
| 106 |
+
if (response.status === 429) {
|
| 107 |
+
console.log('[BrowserAgent] Rate limited, trying next key...');
|
| 108 |
+
continue;
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
const data = await response.json();
|
| 112 |
+
|
| 113 |
+
if (data.choices && data.choices[0]?.message?.content) {
|
| 114 |
+
const content = data.choices[0].message.content;
|
| 115 |
+
// Extract JSON from response (handle markdown code blocks)
|
| 116 |
+
const jsonMatch = content.match(/\{[\s\S]*\}/);
|
| 117 |
+
if (jsonMatch) {
|
| 118 |
+
return JSON.parse(jsonMatch[0]);
|
| 119 |
+
}
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
if (data.error) {
|
| 123 |
+
console.log(`[BrowserAgent] API error: ${data.error.message}, trying next key...`);
|
| 124 |
+
continue;
|
| 125 |
+
}
|
| 126 |
+
} catch (error) {
|
| 127 |
+
console.log(`[BrowserAgent] Key failed: ${error.message}, trying next...`);
|
| 128 |
+
continue;
|
| 129 |
+
}
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
console.error('[BrowserAgent] All API keys failed');
|
| 133 |
+
return {
|
| 134 |
+
observation: 'Error analyzing screenshot',
|
| 135 |
+
thinking: 'All API keys failed',
|
| 136 |
+
action: { type: 'wait', description: 'Waiting due to error' },
|
| 137 |
+
taskComplete: false
|
| 138 |
+
};
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
/**
|
| 142 |
+
* Execute a browser action
|
| 143 |
+
*/
|
| 144 |
+
async function executeAction(page, action) {
|
| 145 |
+
try {
|
| 146 |
+
switch (action.type) {
|
| 147 |
+
case 'click':
|
| 148 |
+
await page.mouse.click(action.x, action.y);
|
| 149 |
+
await page.waitForTimeout(1000);
|
| 150 |
+
break;
|
| 151 |
+
|
| 152 |
+
case 'type':
|
| 153 |
+
if (action.x && action.y) {
|
| 154 |
+
await page.mouse.click(action.x, action.y);
|
| 155 |
+
await page.waitForTimeout(300);
|
| 156 |
+
}
|
| 157 |
+
await page.keyboard.type(action.text, { delay: 50 });
|
| 158 |
+
await page.keyboard.press('Enter');
|
| 159 |
+
await page.waitForTimeout(2000);
|
| 160 |
+
break;
|
| 161 |
+
|
| 162 |
+
case 'scroll':
|
| 163 |
+
const amount = action.direction === 'up' ? -400 : 400;
|
| 164 |
+
await page.mouse.wheel(0, amount);
|
| 165 |
+
await page.waitForTimeout(500);
|
| 166 |
+
break;
|
| 167 |
+
|
| 168 |
+
case 'goto':
|
| 169 |
+
await page.goto(action.url, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
| 170 |
+
await page.waitForTimeout(2000);
|
| 171 |
+
break;
|
| 172 |
+
|
| 173 |
+
case 'wait':
|
| 174 |
+
await page.waitForTimeout(2000);
|
| 175 |
+
break;
|
| 176 |
+
|
| 177 |
+
case 'done':
|
| 178 |
+
// Task is complete, no action needed
|
| 179 |
+
break;
|
| 180 |
+
|
| 181 |
+
default:
|
| 182 |
+
console.log('[BrowserAgent] Unknown action:', action.type);
|
| 183 |
+
}
|
| 184 |
+
return true;
|
| 185 |
+
} catch (error) {
|
| 186 |
+
console.error('[BrowserAgent] Action execution error:', error.message);
|
| 187 |
+
return false;
|
| 188 |
+
}
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
/**
|
| 192 |
+
* Run the Browser Agent loop
|
| 193 |
+
*/
|
| 194 |
+
async function runBrowserAgent(page, task, socket, maxSteps = 10) {
|
| 195 |
+
console.log(`[BrowserAgent] Starting task: "${task}"`);
|
| 196 |
+
|
| 197 |
+
const steps = [];
|
| 198 |
+
let taskComplete = false;
|
| 199 |
+
let finalResult = null;
|
| 200 |
+
|
| 201 |
+
// Start by going to Google
|
| 202 |
+
await page.goto('https://www.google.com', { waitUntil: 'domcontentloaded' });
|
| 203 |
+
await page.waitForTimeout(1000);
|
| 204 |
+
|
| 205 |
+
for (let i = 0; i < maxSteps && !taskComplete; i++) {
|
| 206 |
+
console.log(`[BrowserAgent] Step ${i + 1}/${maxSteps}`);
|
| 207 |
+
|
| 208 |
+
// 1. Take screenshot
|
| 209 |
+
const screenshotBuffer = await page.screenshot({ type: 'png' });
|
| 210 |
+
const screenshotBase64 = screenshotBuffer.toString('base64');
|
| 211 |
+
|
| 212 |
+
// 2. Get current URL
|
| 213 |
+
const currentUrl = page.url();
|
| 214 |
+
|
| 215 |
+
// 3. Analyze with Vision AI
|
| 216 |
+
console.log('[BrowserAgent] Analyzing screenshot with Vision AI...');
|
| 217 |
+
const analysis = await analyzeWithVision(screenshotBase64, task, steps, currentUrl);
|
| 218 |
+
|
| 219 |
+
console.log(`[BrowserAgent] Observation: ${analysis.observation}`);
|
| 220 |
+
console.log(`[BrowserAgent] Thinking: ${analysis.thinking}`);
|
| 221 |
+
console.log(`[BrowserAgent] Action: ${analysis.action?.type} - ${analysis.action?.description}`);
|
| 222 |
+
|
| 223 |
+
// 4. Record step
|
| 224 |
+
const step = {
|
| 225 |
+
stepNumber: i + 1,
|
| 226 |
+
screenshot: screenshotBase64,
|
| 227 |
+
observation: analysis.observation,
|
| 228 |
+
thinking: analysis.thinking,
|
| 229 |
+
action: analysis.action,
|
| 230 |
+
timestamp: Date.now()
|
| 231 |
+
};
|
| 232 |
+
steps.push(step);
|
| 233 |
+
|
| 234 |
+
// 5. Send update to frontend
|
| 235 |
+
if (socket) {
|
| 236 |
+
socket.emit('agent:step', {
|
| 237 |
+
step: i + 1,
|
| 238 |
+
total: maxSteps,
|
| 239 |
+
screenshot: screenshotBase64,
|
| 240 |
+
observation: analysis.observation,
|
| 241 |
+
action: analysis.action?.description || analysis.action?.type,
|
| 242 |
+
taskComplete: analysis.taskComplete
|
| 243 |
+
});
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
// 6. Check if task is complete
|
| 247 |
+
if (analysis.taskComplete) {
|
| 248 |
+
taskComplete = true;
|
| 249 |
+
finalResult = analysis.result;
|
| 250 |
+
console.log('[BrowserAgent] Task completed!');
|
| 251 |
+
console.log('[BrowserAgent] Result:', finalResult);
|
| 252 |
+
break;
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
// 7. Execute the action
|
| 256 |
+
if (analysis.action && analysis.action.type !== 'done') {
|
| 257 |
+
await executeAction(page, analysis.action);
|
| 258 |
+
}
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
// Take final screenshot
|
| 262 |
+
const finalScreenshot = await page.screenshot({ type: 'png' });
|
| 263 |
+
|
| 264 |
+
return {
|
| 265 |
+
success: taskComplete,
|
| 266 |
+
steps: steps,
|
| 267 |
+
result: finalResult,
|
| 268 |
+
finalScreenshot: finalScreenshot.toString('base64'),
|
| 269 |
+
totalSteps: steps.length
|
| 270 |
+
};
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
export {
|
| 274 |
+
runBrowserAgent,
|
| 275 |
+
analyzeWithVision,
|
| 276 |
+
executeAction
|
| 277 |
+
};
|
index.js
CHANGED
|
@@ -1,359 +1,395 @@
|
|
| 1 |
-
/**
|
| 2 |
-
* Lukas Worker - The Muscles
|
| 3 |
-
* Browser automation server with Socket.io for real-time control and streaming
|
| 4 |
-
* Deploy this to Hugging Face Spaces as a Docker container
|
| 5 |
-
*/
|
| 6 |
-
|
| 7 |
-
import express from 'express';
|
| 8 |
-
import { createServer } from 'http';
|
| 9 |
-
import { Server } from 'socket.io';
|
| 10 |
-
import { chromium } from 'playwright';
|
| 11 |
-
import dotenv from 'dotenv';
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
const
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
const
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
});
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
});
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
//
|
| 47 |
-
//
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
let
|
| 51 |
-
let
|
| 52 |
-
let
|
| 53 |
-
let
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
'--
|
| 64 |
-
'--disable-
|
| 65 |
-
'--disable-
|
| 66 |
-
'--
|
| 67 |
-
'--no-
|
| 68 |
-
'--
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
}
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
//
|
| 93 |
-
//
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
if (
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
}
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
streamInterval
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
}
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
//
|
| 129 |
-
//
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
}
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
socket.
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
//
|
| 163 |
-
//
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
const
|
| 260 |
-
const
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
//
|
| 329 |
-
//
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Lukas Worker - The Muscles
|
| 3 |
+
* Browser automation server with Socket.io for real-time control and streaming
|
| 4 |
+
* Deploy this to Hugging Face Spaces as a Docker container
|
| 5 |
+
*/
|
| 6 |
+
|
| 7 |
+
import express from 'express';
|
| 8 |
+
import { createServer } from 'http';
|
| 9 |
+
import { Server } from 'socket.io';
|
| 10 |
+
import { chromium } from 'playwright';
|
| 11 |
+
import dotenv from 'dotenv';
|
| 12 |
+
import { runBrowserAgent } from './browser-agent.js';
|
| 13 |
+
|
| 14 |
+
dotenv.config();
|
| 15 |
+
|
| 16 |
+
const PORT = process.env.PORT || 7860;
|
| 17 |
+
const WORKER_SECRET = process.env.WORKER_SECRET || 'lukas-dev-secret';
|
| 18 |
+
|
| 19 |
+
const app = express();
|
| 20 |
+
const httpServer = createServer(app);
|
| 21 |
+
|
| 22 |
+
// Socket.io server with CORS for Vercel
|
| 23 |
+
const io = new Server(httpServer, {
|
| 24 |
+
cors: {
|
| 25 |
+
origin: ['https://luks-pied.vercel.app', 'http://localhost:5173', 'http://localhost:3000'],
|
| 26 |
+
methods: ['GET', 'POST'],
|
| 27 |
+
credentials: true
|
| 28 |
+
},
|
| 29 |
+
transports: ['websocket', 'polling']
|
| 30 |
+
});
|
| 31 |
+
|
| 32 |
+
// Health check endpoint (Required for Hugging Face)
|
| 33 |
+
app.get('/', (req, res) => {
|
| 34 |
+
res.json({
|
| 35 |
+
status: 'ok',
|
| 36 |
+
service: 'Lukas Worker (The Muscles)',
|
| 37 |
+
version: '1.0.0',
|
| 38 |
+
ready: true
|
| 39 |
+
});
|
| 40 |
+
});
|
| 41 |
+
|
| 42 |
+
app.get('/health', (req, res) => {
|
| 43 |
+
res.json({ status: 'healthy', timestamp: new Date().toISOString() });
|
| 44 |
+
});
|
| 45 |
+
|
| 46 |
+
// =============================================================================
|
| 47 |
+
// BROWSER MANAGEMENT
|
| 48 |
+
// =============================================================================
|
| 49 |
+
|
| 50 |
+
let browser = null;
|
| 51 |
+
let browserContext = null;
|
| 52 |
+
let activePage = null;
|
| 53 |
+
let streamInterval = null;
|
| 54 |
+
let connectedClient = null;
|
| 55 |
+
|
| 56 |
+
async function initBrowser() {
|
| 57 |
+
if (browser) return;
|
| 58 |
+
|
| 59 |
+
console.log('π Launching browser...');
|
| 60 |
+
browser = await chromium.launch({
|
| 61 |
+
headless: true,
|
| 62 |
+
args: [
|
| 63 |
+
'--no-sandbox',
|
| 64 |
+
'--disable-setuid-sandbox',
|
| 65 |
+
'--disable-dev-shm-usage',
|
| 66 |
+
'--disable-accelerated-2d-canvas',
|
| 67 |
+
'--no-first-run',
|
| 68 |
+
'--no-zygote',
|
| 69 |
+
'--disable-gpu'
|
| 70 |
+
]
|
| 71 |
+
});
|
| 72 |
+
|
| 73 |
+
browserContext = await browser.newContext({
|
| 74 |
+
viewport: { width: 1280, height: 720 },
|
| 75 |
+
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
| 76 |
+
});
|
| 77 |
+
|
| 78 |
+
activePage = await browserContext.newPage();
|
| 79 |
+
console.log('β
Browser ready');
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
async function closeBrowser() {
|
| 83 |
+
if (browser) {
|
| 84 |
+
await browser.close();
|
| 85 |
+
browser = null;
|
| 86 |
+
browserContext = null;
|
| 87 |
+
activePage = null;
|
| 88 |
+
console.log('π΄ Browser closed');
|
| 89 |
+
}
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
// =============================================================================
|
| 93 |
+
// STREAMING
|
| 94 |
+
// =============================================================================
|
| 95 |
+
|
| 96 |
+
async function startStreaming(socket) {
|
| 97 |
+
if (streamInterval) clearInterval(streamInterval);
|
| 98 |
+
if (!activePage) return;
|
| 99 |
+
|
| 100 |
+
console.log('πΊ Starting live stream...');
|
| 101 |
+
|
| 102 |
+
streamInterval = setInterval(async () => {
|
| 103 |
+
try {
|
| 104 |
+
if (!activePage) return;
|
| 105 |
+
|
| 106 |
+
const screenshot = await activePage.screenshot({
|
| 107 |
+
type: 'jpeg',
|
| 108 |
+
quality: 60,
|
| 109 |
+
fullPage: false
|
| 110 |
+
});
|
| 111 |
+
|
| 112 |
+
const base64 = screenshot.toString('base64');
|
| 113 |
+
socket.emit('stream:frame', { image: base64 });
|
| 114 |
+
} catch (error) {
|
| 115 |
+
// Page might be navigating, ignore errors
|
| 116 |
+
}
|
| 117 |
+
}, 200); // ~5 FPS for smooth streaming
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
function stopStreaming() {
|
| 121 |
+
if (streamInterval) {
|
| 122 |
+
clearInterval(streamInterval);
|
| 123 |
+
streamInterval = null;
|
| 124 |
+
console.log('πΊ Stream stopped');
|
| 125 |
+
}
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
// =============================================================================
|
| 129 |
+
// SOCKET HANDLERS
|
| 130 |
+
// =============================================================================
|
| 131 |
+
|
| 132 |
+
io.use((socket, next) => {
|
| 133 |
+
const token = socket.handshake.auth?.token;
|
| 134 |
+
|
| 135 |
+
if (token === WORKER_SECRET) {
|
| 136 |
+
console.log('β
Client authenticated');
|
| 137 |
+
next();
|
| 138 |
+
} else {
|
| 139 |
+
console.log('β Authentication failed');
|
| 140 |
+
next(new Error('Authentication failed'));
|
| 141 |
+
}
|
| 142 |
+
});
|
| 143 |
+
|
| 144 |
+
io.on('connection', async (socket) => {
|
| 145 |
+
console.log('π Client connected:', socket.id);
|
| 146 |
+
|
| 147 |
+
// Only allow one client at a time
|
| 148 |
+
if (connectedClient && connectedClient !== socket.id) {
|
| 149 |
+
socket.emit('error', { message: 'Another client is already connected' });
|
| 150 |
+
socket.disconnect();
|
| 151 |
+
return;
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
connectedClient = socket.id;
|
| 155 |
+
|
| 156 |
+
// Initialize browser on first connection
|
| 157 |
+
await initBrowser();
|
| 158 |
+
|
| 159 |
+
// Start streaming automatically
|
| 160 |
+
startStreaming(socket);
|
| 161 |
+
|
| 162 |
+
// =========================================================================
|
| 163 |
+
// COMMAND HANDLERS
|
| 164 |
+
// =========================================================================
|
| 165 |
+
|
| 166 |
+
socket.on('browser:goto', async (data, callback) => {
|
| 167 |
+
try {
|
| 168 |
+
const { url } = data;
|
| 169 |
+
console.log(`π Navigating to: ${url}`);
|
| 170 |
+
|
| 171 |
+
await activePage.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
| 172 |
+
|
| 173 |
+
const title = await activePage.title();
|
| 174 |
+
callback({ success: true, title });
|
| 175 |
+
} catch (error) {
|
| 176 |
+
console.error('β Navigation error:', error.message);
|
| 177 |
+
callback({ success: false, error: error.message });
|
| 178 |
+
}
|
| 179 |
+
});
|
| 180 |
+
|
| 181 |
+
socket.on('browser:click', async (data, callback) => {
|
| 182 |
+
try {
|
| 183 |
+
const { selector, x, y } = data;
|
| 184 |
+
|
| 185 |
+
if (x !== undefined && y !== undefined) {
|
| 186 |
+
// Click by coordinates
|
| 187 |
+
console.log(`π±οΈ Clicking at coordinates: (${x}, ${y})`);
|
| 188 |
+
await activePage.mouse.click(x, y);
|
| 189 |
+
} else if (selector) {
|
| 190 |
+
// Click by selector
|
| 191 |
+
console.log(`π±οΈ Clicking selector: ${selector}`);
|
| 192 |
+
await activePage.click(selector, { timeout: 10000 });
|
| 193 |
+
} else {
|
| 194 |
+
throw new Error('Either selector or x,y coordinates required');
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
callback({ success: true });
|
| 198 |
+
} catch (error) {
|
| 199 |
+
console.error('β Click error:', error.message);
|
| 200 |
+
callback({ success: false, error: error.message });
|
| 201 |
+
}
|
| 202 |
+
});
|
| 203 |
+
|
| 204 |
+
socket.on('browser:type', async (data, callback) => {
|
| 205 |
+
try {
|
| 206 |
+
const { selector, text } = data;
|
| 207 |
+
|
| 208 |
+
if (selector) {
|
| 209 |
+
// Type into specific element
|
| 210 |
+
console.log(`β¨οΈ Typing in selector: ${selector}`);
|
| 211 |
+
await activePage.fill(selector, text);
|
| 212 |
+
} else {
|
| 213 |
+
// Type using keyboard (to focused element)
|
| 214 |
+
console.log(`β¨οΈ Typing text: ${text.substring(0, 20)}...`);
|
| 215 |
+
await activePage.keyboard.type(text, { delay: 30 });
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
callback({ success: true });
|
| 219 |
+
} catch (error) {
|
| 220 |
+
console.error('β Type error:', error.message);
|
| 221 |
+
callback({ success: false, error: error.message });
|
| 222 |
+
}
|
| 223 |
+
});
|
| 224 |
+
|
| 225 |
+
socket.on('browser:scroll', async (data, callback) => {
|
| 226 |
+
try {
|
| 227 |
+
const { direction = 'down', amount = 500 } = data;
|
| 228 |
+
console.log(`π Scrolling ${direction}`);
|
| 229 |
+
|
| 230 |
+
await activePage.evaluate((dir, amt) => {
|
| 231 |
+
window.scrollBy(0, dir === 'down' ? amt : -amt);
|
| 232 |
+
}, direction, amount);
|
| 233 |
+
|
| 234 |
+
callback({ success: true });
|
| 235 |
+
} catch (error) {
|
| 236 |
+
callback({ success: false, error: error.message });
|
| 237 |
+
}
|
| 238 |
+
});
|
| 239 |
+
|
| 240 |
+
socket.on('browser:screenshot', async (data, callback) => {
|
| 241 |
+
try {
|
| 242 |
+
console.log('πΈ Taking screenshot...');
|
| 243 |
+
|
| 244 |
+
const screenshot = await activePage.screenshot({
|
| 245 |
+
type: 'png',
|
| 246 |
+
fullPage: data?.fullPage || false
|
| 247 |
+
});
|
| 248 |
+
|
| 249 |
+
callback({ success: true, image: screenshot.toString('base64') });
|
| 250 |
+
} catch (error) {
|
| 251 |
+
callback({ success: false, error: error.message });
|
| 252 |
+
}
|
| 253 |
+
});
|
| 254 |
+
|
| 255 |
+
socket.on('browser:getContent', async (data, callback) => {
|
| 256 |
+
try {
|
| 257 |
+
console.log('π Getting page content...');
|
| 258 |
+
|
| 259 |
+
const content = await activePage.content();
|
| 260 |
+
const title = await activePage.title();
|
| 261 |
+
const url = activePage.url();
|
| 262 |
+
|
| 263 |
+
// Get text content for AI analysis
|
| 264 |
+
const textContent = await activePage.evaluate(() => {
|
| 265 |
+
return document.body.innerText.substring(0, 10000);
|
| 266 |
+
});
|
| 267 |
+
|
| 268 |
+
callback({ success: true, content, title, url, textContent });
|
| 269 |
+
} catch (error) {
|
| 270 |
+
callback({ success: false, error: error.message });
|
| 271 |
+
}
|
| 272 |
+
});
|
| 273 |
+
|
| 274 |
+
socket.on('browser:getAccessibility', async (data, callback) => {
|
| 275 |
+
try {
|
| 276 |
+
console.log('π³ Getting accessibility tree...');
|
| 277 |
+
|
| 278 |
+
const tree = await activePage.accessibility.snapshot();
|
| 279 |
+
callback({ success: true, tree });
|
| 280 |
+
} catch (error) {
|
| 281 |
+
callback({ success: false, error: error.message });
|
| 282 |
+
}
|
| 283 |
+
});
|
| 284 |
+
|
| 285 |
+
socket.on('browser:execute', async (data, callback) => {
|
| 286 |
+
try {
|
| 287 |
+
const { action, params } = data;
|
| 288 |
+
console.log(`β‘ Executing action: ${action}`);
|
| 289 |
+
|
| 290 |
+
let result = null;
|
| 291 |
+
|
| 292 |
+
switch (action) {
|
| 293 |
+
case 'waitForSelector':
|
| 294 |
+
await activePage.waitForSelector(params.selector, { timeout: params.timeout || 10000 });
|
| 295 |
+
result = { found: true };
|
| 296 |
+
break;
|
| 297 |
+
|
| 298 |
+
case 'pressKey':
|
| 299 |
+
await activePage.keyboard.press(params.key);
|
| 300 |
+
result = { pressed: params.key };
|
| 301 |
+
break;
|
| 302 |
+
|
| 303 |
+
case 'goBack':
|
| 304 |
+
await activePage.goBack();
|
| 305 |
+
result = { navigated: true };
|
| 306 |
+
break;
|
| 307 |
+
|
| 308 |
+
case 'goForward':
|
| 309 |
+
await activePage.goForward();
|
| 310 |
+
result = { navigated: true };
|
| 311 |
+
break;
|
| 312 |
+
|
| 313 |
+
case 'reload':
|
| 314 |
+
await activePage.reload();
|
| 315 |
+
result = { reloaded: true };
|
| 316 |
+
break;
|
| 317 |
+
|
| 318 |
+
default:
|
| 319 |
+
throw new Error(`Unknown action: ${action}`);
|
| 320 |
+
}
|
| 321 |
+
|
| 322 |
+
callback({ success: true, result });
|
| 323 |
+
} catch (error) {
|
| 324 |
+
callback({ success: false, error: error.message });
|
| 325 |
+
}
|
| 326 |
+
});
|
| 327 |
+
|
| 328 |
+
// =========================================================================
|
| 329 |
+
// DISCONNECT HANDLER
|
| 330 |
+
// =========================================================================
|
| 331 |
+
|
| 332 |
+
// =========================================================================
|
| 333 |
+
// BROWSER AGENT (AI-POWERED)
|
| 334 |
+
// =========================================================================
|
| 335 |
+
|
| 336 |
+
socket.on('browser:agent', async (data, callback) => {
|
| 337 |
+
try {
|
| 338 |
+
const { task, maxSteps = 10 } = data;
|
| 339 |
+
console.log('π€ [Agent] Starting AI Browser Agent...');
|
| 340 |
+
console.log(`π― [Agent] Task: "${task}"`);
|
| 341 |
+
|
| 342 |
+
if (!activePage) {
|
| 343 |
+
await initBrowser();
|
| 344 |
+
}
|
| 345 |
+
|
| 346 |
+
// Run the browser agent with Vision AI
|
| 347 |
+
const result = await runBrowserAgent(activePage, task, socket, maxSteps);
|
| 348 |
+
|
| 349 |
+
console.log(`β
[Agent] Completed in ${result.totalSteps} steps`);
|
| 350 |
+
callback({
|
| 351 |
+
success: result.success,
|
| 352 |
+
result: result.result,
|
| 353 |
+
steps: result.steps.map(s => ({
|
| 354 |
+
stepNumber: s.stepNumber,
|
| 355 |
+
observation: s.observation,
|
| 356 |
+
action: s.action?.description || s.action?.type
|
| 357 |
+
})),
|
| 358 |
+
finalScreenshot: result.finalScreenshot,
|
| 359 |
+
totalSteps: result.totalSteps
|
| 360 |
+
});
|
| 361 |
+
} catch (error) {
|
| 362 |
+
console.error('β [Agent] Error:', error.message);
|
| 363 |
+
callback({ success: false, error: error.message });
|
| 364 |
+
}
|
| 365 |
+
});
|
| 366 |
+
|
| 367 |
+
socket.on('disconnect', () => {
|
| 368 |
+
console.log('π Client disconnected:', socket.id);
|
| 369 |
+
stopStreaming();
|
| 370 |
+
connectedClient = null;
|
| 371 |
+
|
| 372 |
+
// Don't close browser immediately, keep it warm for reconnection
|
| 373 |
+
// closeBrowser();
|
| 374 |
+
});
|
| 375 |
+
});
|
| 376 |
+
|
| 377 |
+
// =============================================================================
|
| 378 |
+
// START SERVER
|
| 379 |
+
// =============================================================================
|
| 380 |
+
|
| 381 |
+
httpServer.listen(PORT, '0.0.0.0', () => {
|
| 382 |
+
console.log('βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ');
|
| 383 |
+
console.log(` π¦Ύ Lukas Worker (The Muscles) is running`);
|
| 384 |
+
console.log(` π‘ Socket.io server: http://0.0.0.0:${PORT}`);
|
| 385 |
+
console.log(` π Secret required for connection`);
|
| 386 |
+
console.log('βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ');
|
| 387 |
+
});
|
| 388 |
+
|
| 389 |
+
// Graceful shutdown
|
| 390 |
+
process.on('SIGTERM', async () => {
|
| 391 |
+
console.log('π Shutting down...');
|
| 392 |
+
stopStreaming();
|
| 393 |
+
await closeBrowser();
|
| 394 |
+
process.exit(0);
|
| 395 |
+
});
|