ncolex commited on
Commit
c5f9050
·
verified ·
1 Parent(s): c66a865

Upload backend folder

Browse files
backend/agent.py ADDED
@@ -0,0 +1,437 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio, json, base64, re
2
+ from pathlib import Path
3
+ from typing import Literal
4
+ from backend.smart_browser_controller import SmartBrowserController
5
+ from backend.vision_model import decide
6
+ from backend.universal_extractor import UniversalExtractor
7
+
8
+ def detect_format_from_prompt(prompt: str, default_fmt: str) -> str:
9
+ """Detect format from prompt text and override default if found"""
10
+ prompt_lower = prompt.lower()
11
+
12
+ # Format detection patterns
13
+ format_patterns = {
14
+ 'pdf': [r'\bpdf\b', r'pdf format', r'save.*pdf', r'as pdf', r'to pdf'],
15
+ 'csv': [r'\bcsv\b', r'csv format', r'save.*csv', r'as csv', r'to csv'],
16
+ 'json': [r'\bjson\b', r'json format', r'save.*json', r'as json', r'to json'],
17
+ 'html': [r'\bhtml\b', r'html format', r'save.*html', r'as html', r'to html'],
18
+ 'md': [r'\bmarkdown\b', r'md format', r'save.*markdown', r'as markdown', r'to md'],
19
+ 'txt': [r'\btext\b', r'txt format', r'save.*text', r'as text', r'to txt', r'plain text']
20
+ }
21
+
22
+ # Check each format pattern
23
+ for fmt, patterns in format_patterns.items():
24
+ for pattern in patterns:
25
+ if re.search(pattern, prompt_lower):
26
+ print(f"🎯 Detected format '{fmt}' from prompt")
27
+ return fmt
28
+
29
+ print(f"📋 No specific format detected, using default: {default_fmt}")
30
+ return default_fmt
31
+
32
+ def get_file_extension(fmt: str) -> str:
33
+ """Get appropriate file extension for format"""
34
+ extensions = {
35
+ 'txt': 'txt',
36
+ 'md': 'md',
37
+ 'json': 'json',
38
+ 'html': 'html',
39
+ 'csv': 'csv',
40
+ 'pdf': 'pdf'
41
+ }
42
+ return extensions.get(fmt, 'output') # fallback to .output
43
+
44
+ def get_content_type(fmt: str) -> str:
45
+ """Get MIME type for format"""
46
+ content_types = {
47
+ 'txt': 'text/plain',
48
+ 'md': 'text/markdown',
49
+ 'json': 'application/json',
50
+ 'html': 'text/html',
51
+ 'csv': 'text/csv',
52
+ 'pdf': 'application/pdf'
53
+ }
54
+ return content_types.get(fmt, 'application/octet-stream')
55
+
56
+ async def run_agent(job_id: str, prompt: str, fmt: Literal["txt","md","json","html","csv","pdf"],
57
+ headless: bool, proxy: dict | None, enable_streaming: bool = False):
58
+ """Enhanced agent with smart proxy rotation and vision-based anti-bot detection"""
59
+ from backend.main import broadcast, OUTPUT_DIR, register_streaming_session, store_job_info
60
+
61
+ print(f"🚀 Starting smart agent with vision-based anti-bot detection")
62
+ print(f"📋 Goal: {prompt}")
63
+ print(f"🌐 Default Format: {fmt}")
64
+
65
+ # Smart format detection from prompt
66
+ detected_fmt = detect_format_from_prompt(prompt, fmt)
67
+ if detected_fmt != fmt:
68
+ print(f"🔄 Format overridden: {fmt} → {detected_fmt}")
69
+ fmt = detected_fmt
70
+
71
+ # Initialize universal extractor
72
+ extractor = UniversalExtractor()
73
+
74
+ # Use SmartBrowserController instead of regular BrowserController
75
+ async with SmartBrowserController(headless, proxy, enable_streaming) as browser:
76
+
77
+ # Register streaming session
78
+ if enable_streaming:
79
+ await register_streaming_session(job_id, browser)
80
+
81
+ # Store job info for later download
82
+ await store_job_info(job_id, {
83
+ "format": fmt,
84
+ "content_type": get_content_type(fmt),
85
+ "extension": get_file_extension(fmt),
86
+ "prompt": prompt
87
+ })
88
+
89
+ # Show initial proxy stats
90
+ proxy_stats = browser.get_proxy_stats()
91
+ print(f"📊 Initial proxy stats: {proxy_stats}")
92
+ await broadcast(job_id, {
93
+ "type": "proxy_stats",
94
+ "stats": proxy_stats
95
+ })
96
+
97
+ # Smart navigation to starting URL
98
+ url_match = re.search(r"https?://[\w\-\.]+[^\s]*", prompt)
99
+ if url_match:
100
+ start_url = url_match.group(0).rstrip('".,;')
101
+ print(f"🔗 Found URL in prompt: {start_url}")
102
+ else:
103
+ start_url = determine_starting_url(prompt)
104
+ # start_url = 'www.google.com'
105
+ print(f"🔗 Starting at: {start_url}")
106
+
107
+ try:
108
+ # This now uses smart navigation with anti-bot detection and proxy rotation
109
+ await browser.goto(start_url)
110
+ print("✅ Successfully navigated with smart proxy rotation")
111
+ except Exception as e:
112
+ print(f"❌ Smart navigation failed: {e}")
113
+ await broadcast(job_id, {
114
+ "type": "error",
115
+ "message": f"Navigation failed: {str(e)}",
116
+ "proxy_stats": browser.get_proxy_stats()
117
+ })
118
+ return
119
+
120
+ await broadcast(job_id, {
121
+ "status": "started",
122
+ "initial_url": browser.page.url,
123
+ "detected_format": fmt,
124
+ "file_extension": get_file_extension(fmt),
125
+ "proxy_stats": browser.get_proxy_stats()
126
+ })
127
+
128
+ # Dynamic limits based on task complexity
129
+ max_steps = determine_max_steps(prompt)
130
+ consecutive_scrolls = 0
131
+ max_consecutive_scrolls = 3
132
+ extraction_attempts = 0
133
+ max_extraction_attempts = 2
134
+
135
+ print(f"🎯 Running for max {max_steps} steps, output format: {fmt}")
136
+
137
+ # Main enhanced agent loop with smart proxy rotation
138
+ for step in range(max_steps):
139
+ print(f"\n🔄 Step {step + 1}/{max_steps}")
140
+
141
+ # Periodically check proxy health and broadcast stats
142
+ if step % 5 == 0:
143
+ proxy_stats = browser.get_proxy_stats()
144
+ await broadcast(job_id, {
145
+ "type": "proxy_stats",
146
+ "stats": proxy_stats,
147
+ "step": step
148
+ })
149
+ print(f"📊 Proxy health check: {proxy_stats['available']}/{proxy_stats['total']} available")
150
+
151
+ try:
152
+ page_state = await browser.get_page_state(include_screenshot=True)
153
+ print(f"📊 Found {len(page_state.selector_map)} interactive elements")
154
+ print(f"📍 Current: {page_state.url}")
155
+
156
+ await broadcast(job_id, {
157
+ "type": "page_info",
158
+ "step": step + 1,
159
+ "url": page_state.url,
160
+ "title": page_state.title,
161
+ "interactive_elements": len(page_state.selector_map),
162
+ "format": fmt
163
+ })
164
+
165
+ if page_state.screenshot:
166
+ await broadcast(job_id, {
167
+ "type": "screenshot",
168
+ "screenshot": page_state.screenshot
169
+ })
170
+
171
+ except Exception as e:
172
+ print(f"❌ Page state failed: {e}")
173
+ continue
174
+
175
+ # Handle empty pages
176
+ if len(page_state.selector_map) == 0:
177
+ if consecutive_scrolls < max_consecutive_scrolls:
178
+ print("⚠️ No interactive elements, trying to scroll...")
179
+ await browser.scroll_page("down", 400)
180
+ consecutive_scrolls += 1
181
+ continue
182
+ else:
183
+ print("⚠️ No elements found after scrolling")
184
+ break
185
+
186
+ # AI decision making
187
+ try:
188
+ screenshot_bytes = base64.b64decode(page_state.screenshot)
189
+ decision = await decide(screenshot_bytes, page_state, prompt)
190
+
191
+ print(f"🤖 AI Decision: {decision.get('action')} - {decision.get('reason', 'No reason')}")
192
+
193
+ await broadcast(job_id, {
194
+ "type": "decision",
195
+ "step": step + 1,
196
+ "decision": decision
197
+ })
198
+
199
+ except Exception as e:
200
+ print(f"❌ AI decision failed: {e}")
201
+ continue
202
+
203
+ # Execute action with enhanced error handling
204
+ action = decision.get("action")
205
+ print(f"⚡ Executing: {action}")
206
+
207
+ try:
208
+ if action == "click":
209
+ index = decision.get("index")
210
+ if index is not None and index in page_state.selector_map:
211
+ elem = page_state.selector_map[index]
212
+ print(f"🖱️ Clicking: {elem.text[:50]}...")
213
+ await browser.click_element_by_index(index, page_state)
214
+ consecutive_scrolls = 0
215
+ extraction_attempts = 0 # Reset on navigation
216
+ await asyncio.sleep(2)
217
+ else:
218
+ print(f"❌ Invalid click index: {index}")
219
+
220
+ elif action == "type":
221
+ index = decision.get("index")
222
+ text = decision.get("text", "")
223
+ if index is not None and index in page_state.selector_map and text:
224
+ elem = page_state.selector_map[index]
225
+ print(f"⌨️ Typing '{text}' into: {elem.text[:30]}...")
226
+ await browser.input_text_by_index(index, text, page_state)
227
+ consecutive_scrolls = 0
228
+ await asyncio.sleep(1)
229
+ else:
230
+ print(f"❌ Invalid type parameters: index={index}, text='{text}'")
231
+
232
+ elif action == "scroll":
233
+ direction = decision.get("direction", "down")
234
+ amount = decision.get("amount", 400)
235
+ print(f"📜 Scrolling {direction} by {amount}px")
236
+ await browser.scroll_page(direction, amount)
237
+ consecutive_scrolls += 1
238
+
239
+ if consecutive_scrolls >= max_consecutive_scrolls:
240
+ print("⚠️ Too many scrolls, trying page end")
241
+ await browser.press_key("End")
242
+ consecutive_scrolls = 0
243
+
244
+ elif action == "press_key":
245
+ key = decision.get("key", "Enter")
246
+ print(f"🔑 Pressing key: {key}")
247
+ await browser.press_key(key)
248
+ consecutive_scrolls = 0
249
+ await asyncio.sleep(2)
250
+
251
+ elif action == "navigate":
252
+ url = decision.get("url", "")
253
+ if url and url.startswith("http"):
254
+ print(f"🔗 Navigating to: {url}")
255
+ # This will use smart navigation with anti-bot detection
256
+ try:
257
+ await browser.goto(url)
258
+ consecutive_scrolls = 0
259
+ extraction_attempts = 0
260
+ await asyncio.sleep(2)
261
+ except Exception as nav_error:
262
+ print(f"❌ Smart navigation failed: {nav_error}")
263
+ # Broadcast navigation failure with proxy stats
264
+ await broadcast(job_id, {
265
+ "type": "navigation_error",
266
+ "url": url,
267
+ "error": str(nav_error),
268
+ "proxy_stats": browser.get_proxy_stats()
269
+ })
270
+ else:
271
+ print(f"❌ Invalid navigation URL: {url}")
272
+
273
+ elif action == "extract":
274
+ extraction_attempts += 1
275
+ if extraction_attempts <= max_extraction_attempts:
276
+ print(f"🔍 Starting intelligent extraction in {fmt} format...")
277
+ await broadcast(job_id, {
278
+ "type": "extraction",
279
+ "status": "starting",
280
+ "attempt": extraction_attempts,
281
+ "format": fmt
282
+ })
283
+
284
+ # Use universal extraction with specified format
285
+ content_result = await extractor.extract_intelligent_content(browser, prompt, fmt, job_id)
286
+
287
+ # Save content with proper extension
288
+ file_extension = get_file_extension(fmt)
289
+ output_file = OUTPUT_DIR / f"{job_id}.{file_extension}"
290
+
291
+ # Handle different content types
292
+ saved_successfully = await save_content(content_result, output_file, fmt, job_id)
293
+
294
+ if saved_successfully:
295
+ print(f"💾 Content saved successfully: {output_file}")
296
+ await broadcast(job_id, {
297
+ "type": "extraction",
298
+ "status": "completed",
299
+ "format": fmt,
300
+ "file_path": str(output_file),
301
+ "file_extension": file_extension,
302
+ "proxy_stats": browser.get_proxy_stats()
303
+ })
304
+ else:
305
+ print(f"❌ Failed to save content")
306
+
307
+ break
308
+ else:
309
+ print("⚠️ Maximum extraction attempts reached")
310
+ break
311
+
312
+ elif action == "done":
313
+ print("✅ Task marked as complete by AI")
314
+ break
315
+
316
+ else:
317
+ print(f"⚠️ Unknown action: {action}")
318
+
319
+ except Exception as e:
320
+ print(f"❌ Action execution failed: {e}")
321
+ await asyncio.sleep(1)
322
+
323
+ # Small delay between actions
324
+ await asyncio.sleep(0.5)
325
+
326
+ # Final extraction if not done yet
327
+ if extraction_attempts == 0:
328
+ print(f"🔍 Performing final extraction in {fmt} format...")
329
+ try:
330
+ content_result = await extractor.extract_intelligent_content(browser, prompt, fmt, job_id)
331
+
332
+ file_extension = get_file_extension(fmt)
333
+ output_file = OUTPUT_DIR / f"{job_id}.{file_extension}"
334
+
335
+ await save_content(content_result, output_file, fmt, job_id)
336
+ print(f"💾 Final content saved: {output_file}")
337
+ except Exception as e:
338
+ print(f"❌ Final extraction failed: {e}")
339
+
340
+ # Final proxy statistics
341
+ final_proxy_stats = browser.get_proxy_stats()
342
+ print(f"📊 Final proxy stats: {final_proxy_stats}")
343
+
344
+ await broadcast(job_id, {
345
+ "status": "finished",
346
+ "final_format": fmt,
347
+ "final_proxy_stats": final_proxy_stats
348
+ })
349
+
350
+ async def save_content(content_result: str, output_file: Path, fmt: str, job_id: str) -> bool:
351
+ """Save content based on format type with enhanced error handling"""
352
+ try:
353
+ if fmt == "pdf":
354
+ # Handle PDF - check for direct save indicator
355
+ if content_result.startswith("PDF_DIRECT_SAVE:"):
356
+ # PDF was saved directly to the correct location
357
+ pdf_path = content_result.split("PDF_DIRECT_SAVE:")[1].strip()
358
+ print(f"📄 PDF saved directly: {pdf_path}")
359
+
360
+ # Verify the file exists at expected location
361
+ if Path(pdf_path).exists():
362
+ return True
363
+ else:
364
+ print(f"❌ PDF file not found at expected location: {pdf_path}")
365
+ return False
366
+
367
+ elif content_result.startswith("PDF saved to:"):
368
+ # Legacy format - PDF was saved elsewhere, need to copy
369
+ pdf_path = content_result.split("PDF saved to: ")[1].strip()
370
+ import shutil
371
+ shutil.copy2(pdf_path, output_file)
372
+ print(f"📄 PDF copied to standard location: {output_file}")
373
+ return True
374
+ else:
375
+ # Content is text, save as fallback
376
+ with open(output_file.with_suffix('.txt'), "w", encoding="utf-8") as f:
377
+ f.write("PDF GENERATION FAILED - TEXT FALLBACK\n")
378
+ f.write("="*50 + "\n\n")
379
+ f.write(content_result)
380
+ print(f"📄 PDF fallback saved as text: {output_file.with_suffix('.txt')}")
381
+ return True
382
+ else:
383
+ # Handle text-based formats
384
+ with open(output_file, "w", encoding="utf-8") as f:
385
+ f.write(content_result)
386
+ print(f"📝 {fmt.upper()} content saved: {output_file}")
387
+ return True
388
+
389
+ except Exception as e:
390
+ print(f"❌ Error saving content: {e}")
391
+ return False
392
+
393
+ def determine_starting_url(prompt: str) -> str:
394
+ """Determine the best starting URL based on the user's goal"""
395
+ prompt_lower = prompt.lower()
396
+
397
+ # Search-related tasks
398
+ if any(word in prompt_lower for word in ["search", "find", "look for", "google"]):
399
+ return "https://duckduckgo.com/"
400
+
401
+ # Code repositories
402
+ if "github" in prompt_lower or "code repository" in prompt_lower:
403
+ return "https://www.github.com"
404
+
405
+ # E-commerce
406
+ if any(word in prompt_lower for word in ["buy", "purchase", "product", "price", "amazon"]):
407
+ return "https://www.amazon.com"
408
+
409
+ # Default to Google for most tasks
410
+ return "https://duckduckgo.com/"
411
+
412
+ def determine_max_steps(prompt: str) -> int:
413
+ """Determine max steps based on task complexity"""
414
+ prompt_lower = prompt.lower()
415
+
416
+ # Simple extraction tasks
417
+ if any(word in prompt_lower for word in ["extract", "get info", "save", "download"]):
418
+ return 15
419
+
420
+ # Complex research tasks
421
+ if any(word in prompt_lower for word in ["research", "analyze", "compare", "comprehensive"]):
422
+ return 25
423
+
424
+ # Form filling or multi-step processes
425
+ if any(word in prompt_lower for word in ["fill", "submit", "register", "apply", "multiple"]):
426
+ return 20
427
+
428
+ # Shopping or product research
429
+ if any(word in prompt_lower for word in ["buy", "product", "price", "review"]):
430
+ return 18
431
+
432
+ # Job searching
433
+ if any(word in prompt_lower for word in ["job", "career", "position"]):
434
+ return 20
435
+
436
+ # Default
437
+ return 20
backend/anti_bot_detection.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import google.generativeai as genai
4
+ import json
5
+ import asyncio
6
+ import functools
7
+ from PIL import Image
8
+ import io
9
+
10
+ class AntiBotVisionModel:
11
+ def __init__(self):
12
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
13
+ self.model = genai.GenerativeModel("gemini-2.5-flash-preview-05-20")
14
+
15
+ async def analyze_anti_bot_page(self, screenshot_b64: str, detection_prompt: str, page_url: str) -> dict:
16
+ """Analyze page screenshot to detect anti-bot systems"""
17
+ try:
18
+ # Convert base64 to PIL Image
19
+ image_data = base64.b64decode(screenshot_b64)
20
+ image = Image.open(io.BytesIO(image_data))
21
+
22
+ # Compress image for token efficiency
23
+ max_size = (1024, 768)
24
+ image.thumbnail(max_size, Image.Resampling.LANCZOS)
25
+
26
+ # Create content for analysis
27
+ content = [detection_prompt, image]
28
+
29
+ # Send to vision model
30
+ response = await asyncio.to_thread(
31
+ functools.partial(self.model.generate_content, content)
32
+ )
33
+
34
+ raw_text = response.text
35
+ print(f"🔍 Anti-bot detection response: {raw_text[:200]}...")
36
+
37
+ # Parse JSON response
38
+ try:
39
+ start = raw_text.find('{')
40
+ end = raw_text.rfind('}') + 1
41
+
42
+ if start != -1 and end > start:
43
+ json_str = raw_text[start:end]
44
+ result = json.loads(json_str)
45
+ return result
46
+ else:
47
+ # Fallback parsing
48
+ return self._parse_fallback_response(raw_text, page_url)
49
+
50
+ except json.JSONDecodeError:
51
+ return self._parse_fallback_response(raw_text, page_url)
52
+
53
+ except Exception as e:
54
+ print(f"❌ Error in anti-bot vision analysis: {e}")
55
+ return {
56
+ "is_anti_bot": False,
57
+ "detection_type": "none",
58
+ "confidence": 0.0,
59
+ "description": f"Analysis failed: {str(e)}",
60
+ "can_solve": False,
61
+ "suggested_action": "retry"
62
+ }
63
+
64
+ def _parse_fallback_response(self, raw_text: str, page_url: str) -> dict:
65
+ """Fallback parsing when JSON extraction fails"""
66
+ text_lower = raw_text.lower()
67
+
68
+ # Simple keyword detection as fallback
69
+ anti_bot_keywords = [
70
+ "cloudflare", "captcha", "verification", "access denied",
71
+ "blocked", "rate limit", "checking your browser", "security check",
72
+ "automated traffic", "unusual activity"
73
+ ]
74
+
75
+ detected_keywords = [kw for kw in anti_bot_keywords if kw in text_lower]
76
+
77
+ if detected_keywords:
78
+ return {
79
+ "is_anti_bot": True,
80
+ "detection_type": detected_keywords[0],
81
+ "confidence": 0.7,
82
+ "description": f"Detected keywords: {', '.join(detected_keywords)}",
83
+ "can_solve": "captcha" in detected_keywords,
84
+ "suggested_action": "solve_captcha" if "captcha" in detected_keywords else "rotate_proxy"
85
+ }
86
+
87
+ return {
88
+ "is_anti_bot": False,
89
+ "detection_type": "none",
90
+ "confidence": 0.5,
91
+ "description": "No clear anti-bot indicators found",
92
+ "can_solve": False,
93
+ "suggested_action": "continue"
94
+ }
95
+
96
+ async def solve_captcha(self, screenshot_b64: str, page_url: str, captcha_type: str) -> dict:
97
+ """Attempt to solve CAPTCHA using vision model"""
98
+ try:
99
+ # Convert base64 to PIL Image
100
+ image_data = base64.b64decode(screenshot_b64)
101
+ image = Image.open(io.BytesIO(image_data))
102
+
103
+ captcha_prompt = f"""
104
+ CAPTCHA SOLVING TASK:
105
+
106
+ You are looking at a CAPTCHA challenge on: {page_url}
107
+ CAPTCHA Type: {captcha_type}
108
+
109
+ Analyze the image and provide the solution:
110
+
111
+ For text CAPTCHAs:
112
+ - Read and transcribe the text/numbers exactly as shown
113
+
114
+ For image selection CAPTCHAs:
115
+ - Identify which images match the requested criteria
116
+ - Provide grid positions or image descriptions
117
+
118
+ For math CAPTCHAs:
119
+ - Solve the mathematical expression
120
+
121
+ Respond with JSON:
122
+ {{
123
+ "can_solve": true/false,
124
+ "solution_type": "text|selection|math|unknown",
125
+ "solution": "the answer or list of selections",
126
+ "confidence": 0.0-1.0,
127
+ "instructions": "step by step what to do"
128
+ }}
129
+ """
130
+
131
+ content = [captcha_prompt, image]
132
+
133
+ response = await asyncio.to_thread(
134
+ functools.partial(self.model.generate_content, content)
135
+ )
136
+
137
+ raw_text = response.text
138
+
139
+ # Parse response
140
+ try:
141
+ start = raw_text.find('{')
142
+ end = raw_text.rfind('}') + 1
143
+
144
+ if start != -1 and end > start:
145
+ json_str = raw_text[start:end]
146
+ return json.loads(json_str)
147
+ except:
148
+ pass
149
+
150
+ return {
151
+ "can_solve": False,
152
+ "solution_type": "unknown",
153
+ "solution": "",
154
+ "confidence": 0.0,
155
+ "instructions": "Could not parse CAPTCHA solution"
156
+ }
157
+
158
+ except Exception as e:
159
+ print(f"❌ Error solving CAPTCHA: {e}")
160
+ return {
161
+ "can_solve": False,
162
+ "solution_type": "error",
163
+ "solution": "",
164
+ "confidence": 0.0,
165
+ "instructions": f"CAPTCHA solving failed: {str(e)}"
166
+ }
backend/browser_controller.py ADDED
@@ -0,0 +1,757 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import subprocess
3
+ import os
4
+ import logging
5
+ import json
6
+ import base64
7
+ from typing import Optional, Dict, List, Any, Tuple
8
+ import hashlib
9
+ from dataclasses import dataclass, asdict
10
+ from pydantic import BaseModel
11
+ from pathlib import Path
12
+
13
+ # Set up logging
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
+
17
+ from playwright.async_api import async_playwright, Page, CDPSession
18
+
19
+ @dataclass
20
+ class ElementInfo:
21
+ """DOM element information compatible with browser-use"""
22
+ index: int
23
+ id: str
24
+ tag_name: str
25
+ xpath: str
26
+ css_selector: str
27
+ text: str
28
+ attributes: Dict[str, str]
29
+ is_clickable: bool
30
+ is_input: bool
31
+ is_visible: bool = True
32
+ is_in_viewport: bool = True
33
+ input_type: Optional[str] = None
34
+ placeholder: Optional[str] = None
35
+ bounding_box: Optional[Dict[str, float]] = None
36
+ center_coordinates: Optional[Dict[str, float]] = None
37
+ viewport_coordinates: Optional[Dict[str, float]] = None
38
+
39
+ class PageState:
40
+ """Page state compatible with browser-use"""
41
+ def __init__(self, url: str, title: str, elements: List[ElementInfo], selector_map: Dict[int, ElementInfo], screenshot: Optional[str] = None):
42
+ self.url = url
43
+ self.title = title
44
+ self.elements = elements
45
+ self.selector_map = selector_map
46
+ self.screenshot = screenshot
47
+ self.clickable_elements = [e for e in elements if e.is_clickable]
48
+ self.input_elements = [e for e in elements if e.is_input]
49
+
50
+ class BrowserController:
51
+ def __init__(self, headless: bool, proxy: dict | None, enable_streaming: bool = False):
52
+ self.headless = headless
53
+ self.proxy = proxy
54
+ self.enable_streaming = enable_streaming
55
+ self.play = None
56
+ self.browser = None
57
+ self.page = None
58
+ self.cdp_session = None
59
+ self.streaming_active = False
60
+ self.stream_clients = set()
61
+ self._cached_page_state = None
62
+ self._cached_url = None
63
+ self._last_action_timestamp = None
64
+ self.input_enabled = False # Track if Input domain is available
65
+ self._original_display = os.environ.get("DISPLAY")
66
+ self._display_was_set = False
67
+ self._xvfb_process: subprocess.Popen | None = None
68
+ self._xvfb_display: str | None = None
69
+
70
+ # Load the robust DOM extraction JavaScript
71
+ self.dom_js = self._get_dom_extraction_js()
72
+
73
+ def _find_free_display(self, start: int = 99, end: int = 110) -> int:
74
+ """Locate a free X display number for Xvfb."""
75
+ for display in range(start, end):
76
+ lock_file = Path(f"/tmp/.X{display}-lock")
77
+ if not lock_file.exists():
78
+ return display
79
+ # Fall back to the starting display even if locked (Xvfb will fail clearly)
80
+ return start
81
+
82
+ def _terminate_xvfb(self):
83
+ """Stop the Xvfb process if it was started."""
84
+ if not self._xvfb_process:
85
+ return
86
+
87
+ self._xvfb_process.terminate()
88
+ try:
89
+ self._xvfb_process.wait(timeout=5)
90
+ except subprocess.TimeoutExpired:
91
+ self._xvfb_process.kill()
92
+ finally:
93
+ self._xvfb_process = None
94
+ self._xvfb_display = None
95
+
96
+ async def _ensure_display(self):
97
+ """Start a virtual X server when running in headful mode without DISPLAY."""
98
+ if self.headless or os.environ.get("DISPLAY"):
99
+ return
100
+
101
+ display_number = self._find_free_display()
102
+ display = f":{display_number}"
103
+ xvfb_cmd = [
104
+ "Xvfb",
105
+ display,
106
+ "-screen",
107
+ "0",
108
+ "1280x800x24",
109
+ "-nolisten",
110
+ "tcp",
111
+ ]
112
+
113
+ try:
114
+ self._xvfb_process = subprocess.Popen(
115
+ xvfb_cmd,
116
+ stdout=subprocess.DEVNULL,
117
+ stderr=subprocess.DEVNULL,
118
+ )
119
+ self._xvfb_display = display
120
+ logger.info("🖥️ Started Xvfb on display %s for headful browser session", display)
121
+ except FileNotFoundError:
122
+ logger.warning("⚠️ Xvfb not available; falling back to headless mode")
123
+ self.headless = True
124
+ return
125
+
126
+ # Wait briefly for Xvfb to be ready
127
+ ready = False
128
+ for _ in range(30):
129
+ if self._xvfb_process.poll() is not None:
130
+ logger.error(
131
+ "❌ Xvfb exited prematurely with code %s", self._xvfb_process.returncode
132
+ )
133
+ self._terminate_xvfb()
134
+ self.headless = True
135
+ return
136
+ if Path(f"/tmp/.X{display_number}-lock").exists():
137
+ ready = True
138
+ break
139
+ await asyncio.sleep(0.1)
140
+
141
+ if not ready:
142
+ logger.warning("⚠️ Timed out waiting for Xvfb; falling back to headless mode")
143
+ self._terminate_xvfb()
144
+ self.headless = True
145
+ return
146
+
147
+ os.environ["DISPLAY"] = display
148
+ self._display_was_set = True
149
+
150
+ def _restore_display(self):
151
+ """Restore the DISPLAY environment variable and stop Xvfb if needed."""
152
+ self._terminate_xvfb()
153
+
154
+ if not self._display_was_set:
155
+ return
156
+
157
+ if self._original_display is None:
158
+ os.environ.pop("DISPLAY", None)
159
+ else:
160
+ os.environ["DISPLAY"] = self._original_display
161
+
162
+ self._display_was_set = False
163
+
164
+ async def __aenter__(self):
165
+ """Initialize browser with CDP streaming support"""
166
+ await self._ensure_display()
167
+
168
+ self.play = await async_playwright().start()
169
+
170
+ launch_options = {
171
+ "headless": self.headless,
172
+ "args": [
173
+ "--no-sandbox",
174
+ "--disable-dev-shm-usage",
175
+ "--disable-gpu",
176
+ "--disable-web-security",
177
+ "--disable-features=VizDisplayCompositor",
178
+ "--window-size=1280,800",
179
+ "--window-position=0,0",
180
+ "--disable-blink-features=AutomationControlled",
181
+ "--disable-extensions",
182
+ "--no-first-run",
183
+ "--disable-default-apps",
184
+ # Enable remote debugging for CDP
185
+ "--remote-debugging-port=0" # Use random port
186
+ ]
187
+ }
188
+
189
+ if self.proxy:
190
+ launch_options["proxy"] = self.proxy
191
+
192
+ self.browser = await self.play.chromium.launch(**launch_options)
193
+ self.page = await self.browser.new_page(viewport={"width": 1280, "height": 800})
194
+
195
+ # Set up CDP session for streaming
196
+ if self.enable_streaming:
197
+ await self._setup_cdp_streaming()
198
+
199
+ await self.page.set_extra_http_headers({
200
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
201
+ })
202
+
203
+ return self
204
+
205
+ async def __aexit__(self, exc_type, exc, tb):
206
+ """Cleanup browser and CDP session"""
207
+ if self.streaming_active:
208
+ await self._stop_cdp_streaming()
209
+ if self.browser:
210
+ await self.browser.close()
211
+ if self.play:
212
+ await self.play.stop()
213
+ self._restore_display()
214
+
215
+ async def _setup_cdp_streaming(self):
216
+ """Setup CDP session for real-time streaming with proper error handling"""
217
+ try:
218
+ # Get CDP session - ensure it's for the page target
219
+ self.cdp_session = await self.page.context.new_cdp_session(self.page)
220
+
221
+ # Enable required domains with error handling
222
+ await self._enable_cdp_domain('Runtime')
223
+ await self._enable_cdp_domain('Page')
224
+
225
+ # Try to enable Input domain (optional)
226
+ self.input_enabled = await self._enable_cdp_domain('Input', optional=True)
227
+
228
+ if self.input_enabled:
229
+ logger.info("✅ Input domain enabled - full interaction support available")
230
+ else:
231
+ logger.warning("⚠️ Input domain not available - using Playwright for interactions")
232
+
233
+ logger.info("✅ CDP session established for streaming")
234
+
235
+ except Exception as e:
236
+ logger.error(f"❌ Failed to setup CDP streaming: {e}")
237
+ raise
238
+
239
+ async def _enable_cdp_domain(self, domain: str, optional: bool = False) -> bool:
240
+ """Enable a CDP domain with proper error handling"""
241
+ try:
242
+ await self.cdp_session.send(f'{domain}.enable')
243
+ logger.info(f"✅ {domain} domain enabled")
244
+ return True
245
+ except Exception as e:
246
+ if optional:
247
+ logger.warning(f"⚠️ {domain} domain not available: {e}")
248
+ return False
249
+ else:
250
+ logger.error(f"❌ Required {domain} domain failed: {e}")
251
+ raise
252
+
253
+ async def start_streaming(self, quality: int = 80):
254
+ """Start CDP screencast streaming with enhanced error handling"""
255
+ if not self.cdp_session:
256
+ raise RuntimeError("CDP session not initialized")
257
+
258
+ try:
259
+ # Check if Page.startScreencast is available
260
+ await self.cdp_session.send('Page.startScreencast', {
261
+ 'format': 'jpeg',
262
+ 'quality': quality,
263
+ 'maxWidth': 1280,
264
+ 'maxHeight': 800,
265
+ 'everyNthFrame': 1 # Stream every frame for real-time
266
+ })
267
+
268
+ # Set up frame listener
269
+ self.cdp_session.on('Page.screencastFrame', self._handle_screencast_frame)
270
+
271
+ self.streaming_active = True
272
+ logger.info("🎥 CDP streaming started successfully")
273
+
274
+ except Exception as e:
275
+ logger.error(f"❌ Failed to start CDP streaming: {e}")
276
+ # Try alternative approach with screenshots
277
+ await self._start_screenshot_polling()
278
+
279
+ async def _start_screenshot_polling(self):
280
+ """Fallback: Use screenshot polling if screencast not available"""
281
+ logger.info("🔄 Starting screenshot polling as fallback")
282
+ self.streaming_active = True
283
+
284
+ async def screenshot_loop():
285
+ while self.streaming_active:
286
+ try:
287
+ screenshot_bytes = await self.page.screenshot(type='jpeg', quality=80)
288
+ screenshot_b64 = base64.b64encode(screenshot_bytes).decode('utf-8')
289
+
290
+ frame_data = {
291
+ 'type': 'frame',
292
+ 'data': screenshot_b64,
293
+ 'timestamp': asyncio.get_event_loop().time(),
294
+ 'method': 'polling'
295
+ }
296
+
297
+ await self._broadcast_to_clients(frame_data)
298
+ await asyncio.sleep(0.1) # 10 FPS
299
+
300
+ except Exception as e:
301
+ logger.error(f"Screenshot polling error: {e}")
302
+ await asyncio.sleep(1)
303
+
304
+ # Start screenshot polling in background
305
+ asyncio.create_task(screenshot_loop())
306
+
307
+ async def stop_streaming(self):
308
+ """Stop CDP screencast streaming"""
309
+ if self.cdp_session and self.streaming_active:
310
+ try:
311
+ await self.cdp_session.send('Page.stopScreencast')
312
+ logger.info("🛑 CDP streaming stopped")
313
+ except Exception as e:
314
+ logger.warning(f"⚠️ Error stopping screencast (may not have been active): {e}")
315
+ finally:
316
+ self.streaming_active = False
317
+
318
+ async def _stop_cdp_streaming(self):
319
+ """Internal cleanup for CDP streaming"""
320
+ await self.stop_streaming()
321
+ if self.cdp_session:
322
+ try:
323
+ await self.cdp_session.detach()
324
+ except Exception as e:
325
+ logger.warning(f"⚠️ Error detaching CDP session: {e}")
326
+
327
+ async def _handle_screencast_frame(self, params):
328
+ """Handle incoming screencast frames"""
329
+ try:
330
+ # Acknowledge frame immediately
331
+ await self.cdp_session.send('Page.screencastFrameAck', {
332
+ 'sessionId': params['sessionId']
333
+ })
334
+
335
+ # Broadcast frame to all connected clients
336
+ frame_data = {
337
+ 'type': 'frame',
338
+ 'data': params['data'], # Base64 encoded JPEG
339
+ 'timestamp': params.get('timestamp'),
340
+ 'method': 'screencast',
341
+ 'metadata': {
342
+ 'sessionId': params['sessionId']
343
+ }
344
+ }
345
+
346
+ # Send to all connected streaming clients
347
+ await self._broadcast_to_clients(frame_data)
348
+
349
+ except Exception as e:
350
+ logger.error(f"❌ Error handling screencast frame: {e}")
351
+
352
+ async def _broadcast_to_clients(self, data):
353
+ """Broadcast data to all connected streaming clients"""
354
+ if not self.stream_clients:
355
+ return
356
+
357
+ disconnected_clients = []
358
+ for client in self.stream_clients:
359
+ try:
360
+ await client.send_text(json.dumps(data))
361
+ except Exception:
362
+ disconnected_clients.append(client)
363
+
364
+ # Remove disconnected clients
365
+ for client in disconnected_clients:
366
+ self.stream_clients.discard(client)
367
+
368
+ def add_stream_client(self, websocket):
369
+ """Add a new streaming client"""
370
+ self.stream_clients.add(websocket)
371
+ logger.info(f"🔗 Stream client connected. Total clients: {len(self.stream_clients)}")
372
+
373
+ def remove_stream_client(self, websocket):
374
+ """Remove a streaming client"""
375
+ self.stream_clients.discard(websocket)
376
+ logger.info(f"🔌 Stream client disconnected. Total clients: {len(self.stream_clients)}")
377
+
378
+ async def handle_mouse_event(self, event_data):
379
+ """Handle mouse events with fallback support"""
380
+ try:
381
+ if self.input_enabled and self.cdp_session:
382
+ # Use CDP Input domain if available
383
+ await self.cdp_session.send('Input.dispatchMouseEvent', {
384
+ 'type': event_data['eventType'],
385
+ 'x': event_data['x'],
386
+ 'y': event_data['y'],
387
+ 'button': event_data.get('button', 'left'),
388
+ 'clickCount': event_data.get('clickCount', 1)
389
+ })
390
+ else:
391
+ # Fallback to Playwright mouse actions
392
+ if event_data['eventType'] == 'mousePressed':
393
+ await self.page.mouse.click(event_data['x'], event_data['y'])
394
+ elif event_data['eventType'] == 'mouseMoved':
395
+ await self.page.mouse.move(event_data['x'], event_data['y'])
396
+
397
+ except Exception as e:
398
+ logger.error(f"❌ Error handling mouse event: {e}")
399
+
400
+ async def handle_keyboard_event(self, event_data):
401
+ """Handle keyboard events with fallback support"""
402
+ try:
403
+ if self.input_enabled and self.cdp_session:
404
+ # Use CDP Input domain if available
405
+ await self.cdp_session.send('Input.dispatchKeyEvent', {
406
+ 'type': event_data['eventType'],
407
+ 'text': event_data.get('text', ''),
408
+ 'key': event_data.get('key', ''),
409
+ 'code': event_data.get('code', ''),
410
+ 'keyCode': event_data.get('keyCode', 0)
411
+ })
412
+ else:
413
+ # Fallback to Playwright keyboard actions
414
+ if event_data['eventType'] == 'keyDown' and event_data.get('key'):
415
+ await self.page.keyboard.press(event_data['key'])
416
+ elif event_data.get('text'):
417
+ await self.page.keyboard.type(event_data['text'])
418
+
419
+ except Exception as e:
420
+ logger.error(f"❌ Error handling keyboard event: {e}")
421
+
422
+ def get_streaming_info(self):
423
+ """Get streaming connection information"""
424
+ if self.enable_streaming:
425
+ return {
426
+ "enabled": True,
427
+ "active": self.streaming_active,
428
+ "clients": len(self.stream_clients),
429
+ "websocket_url": "ws://localhost:8000/stream",
430
+ "input_enabled": self.input_enabled,
431
+ "method": "screencast" if self.input_enabled else "polling"
432
+ }
433
+ return {"enabled": False}
434
+
435
+ # Keep all your existing methods from the original code
436
+ def _get_dom_extraction_js(self) -> str:
437
+ """Get the robust DOM extraction JavaScript similar to browser-use"""
438
+ return """
439
+ (args) => {
440
+ const { doHighlightElements = true, debugMode = false } = args || {};
441
+
442
+ // Performance tracking
443
+ const startTime = performance.now();
444
+ let nodeCount = 0;
445
+ let processedCount = 0;
446
+
447
+ // Results
448
+ const elementMap = new Map();
449
+ const selectorMap = {};
450
+ let highlightIndex = 0;
451
+
452
+ // Helper functions
453
+ function getClassName(element) {
454
+ if (!element.className) return '';
455
+ if (typeof element.className === 'string') return element.className;
456
+ if (element.className.toString) return element.className.toString();
457
+ if (element.classList && element.classList.length > 0) {
458
+ return Array.from(element.classList).join(' ');
459
+ }
460
+ return '';
461
+ }
462
+
463
+ function isInteractive(element) {
464
+ const tagName = element.tagName.toLowerCase();
465
+ const interactiveTags = ['a', 'button', 'input', 'select', 'textarea', 'label'];
466
+ if (interactiveTags.includes(tagName)) return true;
467
+ if (element.onclick || element.getAttribute('onclick')) return true;
468
+ if (element.getAttribute('role') === 'button') return true;
469
+ if (element.getAttribute('role') === 'link') return true;
470
+ if (element.hasAttribute('tabindex')) return true;
471
+ if (element.contentEditable === 'true') return true;
472
+ const style = window.getComputedStyle(element);
473
+ if (style.cursor === 'pointer') return true;
474
+ return false;
475
+ }
476
+
477
+ function isInput(element) {
478
+ const tagName = element.tagName.toLowerCase();
479
+ return ['input', 'textarea', 'select'].includes(tagName) ||
480
+ element.contentEditable === 'true';
481
+ }
482
+
483
+ function getTextContent(element) {
484
+ let text = '';
485
+ if (element.textContent) {
486
+ text = element.textContent.trim();
487
+ }
488
+ if (element.value) {
489
+ text = element.value;
490
+ } else if (element.placeholder) {
491
+ text = element.placeholder;
492
+ }
493
+ if (element.tagName === 'IMG' && element.alt) {
494
+ text = element.alt;
495
+ }
496
+ return text.substring(0, 200);
497
+ }
498
+
499
+ function isVisibleAndInViewport(element) {
500
+ const rect = element.getBoundingClientRect();
501
+ const style = window.getComputedStyle(element);
502
+ const hasDimensions = rect.width > 0 && rect.height > 0;
503
+ const isVisible = style.visibility !== 'hidden' &&
504
+ style.display !== 'none' &&
505
+ style.opacity !== '0';
506
+ const isInViewport = rect.top < window.innerHeight &&
507
+ rect.bottom > 0 &&
508
+ rect.left < window.innerWidth &&
509
+ rect.right > 0;
510
+ return hasDimensions && isVisible && isInViewport;
511
+ }
512
+
513
+ // Process elements
514
+ const allElements = document.querySelectorAll('*');
515
+ const elements = [];
516
+
517
+ allElements.forEach(element => {
518
+ nodeCount++;
519
+ if (!element || element.nodeType !== 1) return;
520
+
521
+ const isElementVisible = isVisibleAndInViewport(element);
522
+ const isElementInteractive = isInteractive(element);
523
+ const isElementInput = isInput(element);
524
+
525
+ if (!isElementVisible && !isElementInteractive) return;
526
+
527
+ processedCount++;
528
+ const rect = element.getBoundingClientRect();
529
+ const elementId = `element_${processedCount}`;
530
+ let currentHighlightIndex = null;
531
+
532
+ if (isElementInteractive || isElementInput) {
533
+ currentHighlightIndex = highlightIndex++;
534
+
535
+ if (doHighlightElements) {
536
+ element.style.outline = '2px solid red';
537
+ element.style.outlineOffset = '1px';
538
+
539
+ const label = document.createElement('div');
540
+ label.textContent = currentHighlightIndex.toString();
541
+ label.style.cssText = `
542
+ position: absolute;
543
+ top: ${rect.top + window.scrollY - 20}px;
544
+ left: ${rect.left + window.scrollX}px;
545
+ background: red;
546
+ color: white;
547
+ padding: 2px 6px;
548
+ font-size: 12px;
549
+ font-weight: bold;
550
+ z-index: 10000;
551
+ border-radius: 3px;
552
+ pointer-events: none;
553
+ `;
554
+ document.body.appendChild(label);
555
+ }
556
+ }
557
+
558
+ const elementData = {
559
+ index: currentHighlightIndex,
560
+ id: elementId,
561
+ tagName: element.tagName.toLowerCase(),
562
+ xpath: '',
563
+ cssSelector: '',
564
+ text: getTextContent(element),
565
+ attributes: {},
566
+ isClickable: isElementInteractive,
567
+ isInput: isElementInput,
568
+ isVisible: isElementVisible,
569
+ isInViewport: isElementVisible,
570
+ inputType: element.type || null,
571
+ placeholder: element.placeholder || null,
572
+ boundingBox: {
573
+ x: rect.x,
574
+ y: rect.y,
575
+ width: rect.width,
576
+ height: rect.height,
577
+ top: rect.top,
578
+ bottom: rect.bottom,
579
+ left: rect.left,
580
+ right: rect.right
581
+ },
582
+ centerCoordinates: {
583
+ x: rect.left + rect.width / 2,
584
+ y: rect.top + rect.height / 2
585
+ }
586
+ };
587
+
588
+ if (element.attributes) {
589
+ for (let attr of element.attributes) {
590
+ elementData.attributes[attr.name] = attr.value;
591
+ }
592
+ }
593
+
594
+ elements.push(elementData);
595
+
596
+ if (currentHighlightIndex !== null) {
597
+ selectorMap[currentHighlightIndex] = elementData;
598
+ }
599
+ });
600
+
601
+ const endTime = performance.now();
602
+ return {
603
+ elements: elements,
604
+ selectorMap: selectorMap,
605
+ stats: {
606
+ totalNodes: nodeCount,
607
+ processedNodes: processedCount,
608
+ interactiveElements: Object.keys(selectorMap).length,
609
+ executionTime: endTime - startTime
610
+ }
611
+ };
612
+ }
613
+ """
614
+
615
+ # Add all your existing methods here (goto, get_page_state, click_element_by_index, etc.)
616
+ async def goto(self, url: str, wait_until: str = "domcontentloaded", timeout: int = 30000):
617
+ """Navigate to a URL with proper waiting"""
618
+ try:
619
+ logger.info(f"Navigating to: {url}")
620
+ await self.page.goto(url, wait_until=wait_until, timeout=timeout)
621
+ await asyncio.sleep(2)
622
+ logger.info(f"Successfully navigated to: {url}")
623
+ except Exception as e:
624
+ logger.error(f"Failed to navigate to {url}: {e}")
625
+ raise
626
+
627
+ async def get_page_state(self, include_screenshot: bool = True, highlight_elements: bool = True) -> PageState:
628
+ """Get current page state with elements"""
629
+ try:
630
+ await self.page.wait_for_load_state("domcontentloaded", timeout=10000)
631
+ await asyncio.sleep(1)
632
+
633
+ url = self.page.url
634
+ title = await self.page.title()
635
+
636
+ screenshot = None
637
+ if include_screenshot:
638
+ screenshot_bytes = await self.page.screenshot(
639
+ full_page=False,
640
+ clip={'x': 0, 'y': 0, 'width': 1250, 'height': 800}
641
+ )
642
+ screenshot = base64.b64encode(screenshot_bytes).decode('utf-8')
643
+
644
+ # Extract DOM elements
645
+ try:
646
+ dom_result = await self.page.evaluate(self.dom_js, {"doHighlightElements": highlight_elements})
647
+ logger.info(f"Extracted {len(dom_result.get('elements', []))} interactive elements")
648
+ except Exception as e:
649
+ logger.error(f"DOM extraction failed: {e}")
650
+ return PageState(url, title, [], {}, screenshot)
651
+
652
+ elements = []
653
+ selector_map = {}
654
+
655
+ for elem_data in dom_result.get('elements', []):
656
+ element_info = ElementInfo(
657
+ index=elem_data.get('index'),
658
+ id=elem_data.get('id', ''),
659
+ tag_name=elem_data.get('tagName', ''),
660
+ xpath=elem_data.get('xpath', ''),
661
+ css_selector=elem_data.get('cssSelector', ''),
662
+ text=elem_data.get('text', ''),
663
+ attributes=elem_data.get('attributes', {}),
664
+ is_clickable=elem_data.get('isClickable', False),
665
+ is_input=elem_data.get('isInput', False),
666
+ center_coordinates=elem_data.get('centerCoordinates')
667
+ )
668
+
669
+ elements.append(element_info)
670
+ if element_info.index is not None:
671
+ selector_map[element_info.index] = element_info
672
+
673
+ return PageState(url, title, elements, selector_map, screenshot)
674
+
675
+ except Exception as e:
676
+ logger.error(f"Failed to get page state: {e}")
677
+ return PageState("", "", [], {}, None)
678
+
679
+ async def click_element_by_index(self, index: int, page_state: PageState = None) -> bool:
680
+ """Click element by index"""
681
+ try:
682
+ if page_state is None:
683
+ page_state = await self.get_page_state(include_screenshot=False, highlight_elements=False)
684
+
685
+ if index not in page_state.selector_map:
686
+ logger.error(f"Element with index {index} not found")
687
+ return False
688
+
689
+ element = page_state.selector_map[index]
690
+ if not element.center_coordinates:
691
+ logger.error(f"Element at index {index} has no coordinates")
692
+ return False
693
+
694
+ x = element.center_coordinates['x']
695
+ y = element.center_coordinates['y']
696
+
697
+ logger.info(f"Clicking element {index}: {element.text[:50]}... at ({x}, {y})")
698
+
699
+ await self.page.mouse.click(x, y)
700
+ await asyncio.sleep(1)
701
+
702
+ logger.info(f"Successfully clicked element {index}")
703
+ return True
704
+
705
+ except Exception as e:
706
+ logger.error(f"Failed to click element at index {index}: {e}")
707
+ return False
708
+
709
+ async def input_text_by_index(self, index: int, text: str, page_state: PageState = None) -> bool:
710
+ """Input text into element by index"""
711
+ try:
712
+ if page_state is None:
713
+ page_state = await self.get_page_state(include_screenshot=False, highlight_elements=False)
714
+
715
+ if index not in page_state.selector_map:
716
+ logger.error(f"Element with index {index} not found")
717
+ return False
718
+
719
+ element = page_state.selector_map[index]
720
+ if not element.center_coordinates:
721
+ logger.error(f"Element at index {index} has no coordinates")
722
+ return False
723
+
724
+ x = element.center_coordinates['x']
725
+ y = element.center_coordinates['y']
726
+
727
+ logger.info(f"Typing '{text}' into element {index}")
728
+
729
+ await self.page.mouse.click(x, y)
730
+ await asyncio.sleep(0.5)
731
+ await self.page.keyboard.press('Control+a')
732
+ await self.page.keyboard.type(text)
733
+
734
+ logger.info(f"Successfully typed text into element {index}")
735
+ return True
736
+
737
+ except Exception as e:
738
+ logger.error(f"Failed to input text into element at index {index}: {e}")
739
+ return False
740
+
741
+ async def scroll_page(self, direction: str = "down", amount: int = 500):
742
+ """Scroll the page"""
743
+ if direction == "down":
744
+ await self.page.mouse.wheel(0, amount)
745
+ elif direction == "up":
746
+ await self.page.mouse.wheel(0, -amount)
747
+ await asyncio.sleep(1)
748
+
749
+ async def press_key(self, key: str) -> bool:
750
+ """Press a keyboard key"""
751
+ try:
752
+ await self.page.keyboard.press(key)
753
+ logger.info(f"Pressed key: {key}")
754
+ return True
755
+ except Exception as e:
756
+ logger.error(f"Failed to press key {key}: {e}")
757
+ return False
backend/cdp_streamer.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/cdp_streamer.py
2
+ import asyncio
3
+ import json
4
+ import websockets
5
+ from playwright.async_api import CDPSession
6
+
7
+ class CDPBrowserStreamer:
8
+ def __init__(self, page):
9
+ self.page = page
10
+ self.cdp_session = CDPSession()
11
+ self.streaming = False
12
+
13
+ async def start_streaming(self, websocket_port: int = 8080):
14
+ """Start CDP-based streaming"""
15
+ try:
16
+ # Get CDP session from Playwright page
17
+ self.cdp_session = await self.page.context.new_cdp_session(self.page)
18
+
19
+ # Enable necessary CDP domains
20
+ await self.cdp_session.send('Runtime.enable')
21
+ await self.cdp_session.send('Page.enable')
22
+ await self.cdp_session.send('Page.startScreencast', {
23
+ 'format': 'jpeg',
24
+ 'quality': 80,
25
+ 'maxWidth': 1280,
26
+ 'maxHeight': 800,
27
+ 'everyNthFrame': 1 # Stream every frame for real-time
28
+ })
29
+
30
+ # Start WebSocket server for streaming
31
+ await websockets.serve(self.handle_client, "localhost", websocket_port)
32
+ print(f"🎥 CDP Streaming started on port {websocket_port}")
33
+
34
+ except Exception as e:
35
+ print(f"❌ Failed to start CDP streaming: {e}")
36
+
37
+ async def handle_client(self, websocket, path):
38
+ """Handle WebSocket clients for streaming"""
39
+ print("🔗 Client connected to CDP stream")
40
+
41
+ try:
42
+ # Listen for screencast frames
43
+ self.cdp_session.on('Page.screencastFrame', lambda params:
44
+ asyncio.create_task(self.send_frame(websocket, params)))
45
+
46
+ # Keep connection alive and handle client messages
47
+ async for message in websocket:
48
+ data = json.loads(message)
49
+ if data['type'] == 'mouse':
50
+ await self.handle_mouse_event(data)
51
+ elif data['type'] == 'keyboard':
52
+ await self.handle_keyboard_event(data)
53
+
54
+ except websockets.exceptions.ConnectionClosed:
55
+ print("🔌 Client disconnected from CDP stream")
56
+
57
+ async def send_frame(self, websocket, params):
58
+ """Send screencast frame to client"""
59
+ try:
60
+ frame_data = {
61
+ 'type': 'frame',
62
+ 'data': params['data'], # Base64 encoded JPEG
63
+ 'metadata': {
64
+ 'sessionId': params['sessionId'],
65
+ 'timestamp': params.get('timestamp')
66
+ }
67
+ }
68
+ await websocket.send(json.dumps(frame_data))
69
+
70
+ # Acknowledge frame
71
+ await self.cdp_session.send('Page.screencastFrameAck', {
72
+ 'sessionId': params['sessionId']
73
+ })
74
+ except Exception as e:
75
+ print(f"❌ Error sending frame: {e}")
76
+
77
+ async def handle_mouse_event(self, data):
78
+ """Handle mouse events from client"""
79
+ await self.cdp_session.send('Input.dispatchMouseEvent', {
80
+ 'type': data['eventType'], # 'mousePressed', 'mouseReleased', 'mouseMoved'
81
+ 'x': data['x'],
82
+ 'y': data['y'],
83
+ 'button': data.get('button', 'left'),
84
+ 'clickCount': data.get('clickCount', 1)
85
+ })
86
+
87
+ async def handle_keyboard_event(self, data):
88
+ """Handle keyboard events from client"""
89
+ await self.cdp_session.send('Input.dispatchKeyEvent', {
90
+ 'type': data['eventType'], # 'keyDown', 'keyUp', 'char'
91
+ 'text': data.get('text', ''),
92
+ 'key': data.get('key', ''),
93
+ 'code': data.get('code', '')
94
+ })
backend/database.py ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Neon PostgreSQL Database Integration
3
+ Stores job history, results, and metadata
4
+ """
5
+ import asyncpg
6
+ import json
7
+ import os
8
+ from datetime import datetime
9
+ from typing import Optional, List, Dict, Any
10
+
11
+ class Database:
12
+ def __init__(self):
13
+ self.pool: Optional[asyncpg.Pool] = None
14
+ self.database_url = os.getenv("DATABASE_URL")
15
+
16
+ async def connect(self):
17
+ """Initialize database connection pool"""
18
+ if not self.database_url:
19
+ print("⚠️ DATABASE_URL not set, database features disabled")
20
+ return
21
+
22
+ try:
23
+ self.pool = await asyncpg.create_pool(
24
+ self.database_url,
25
+ min_size=2,
26
+ max_size=10,
27
+ command_timeout=60
28
+ )
29
+ await self.init_tables()
30
+ print("✅ Database connected successfully")
31
+ except Exception as e:
32
+ print(f"❌ Database connection failed: {e}")
33
+ self.pool = None
34
+
35
+ async def disconnect(self):
36
+ """Close database connections"""
37
+ if self.pool:
38
+ await self.pool.close()
39
+ print("🔌 Database disconnected")
40
+
41
+ async def init_tables(self):
42
+ """Create tables if they don't exist"""
43
+ if not self.pool:
44
+ return
45
+
46
+ async with self.pool.acquire() as conn:
47
+ # Jobs table
48
+ await conn.execute("""
49
+ CREATE TABLE IF NOT EXISTS jobs (
50
+ id TEXT PRIMARY KEY,
51
+ prompt TEXT NOT NULL,
52
+ format TEXT NOT NULL,
53
+ status TEXT DEFAULT 'pending',
54
+ created_at TIMESTAMPTZ DEFAULT NOW(),
55
+ completed_at TIMESTAMPTZ,
56
+ file_extension TEXT,
57
+ content_type TEXT,
58
+ proxy_server TEXT,
59
+ headless BOOLEAN DEFAULT FALSE,
60
+ streaming_enabled BOOLEAN DEFAULT FALSE,
61
+ error_message TEXT
62
+ )
63
+ """)
64
+
65
+ # Job results table (stores extracted content metadata)
66
+ await conn.execute("""
67
+ CREATE TABLE IF NOT EXISTS job_results (
68
+ id SERIAL PRIMARY KEY,
69
+ job_id TEXT REFERENCES jobs(id) ON DELETE CASCADE,
70
+ content_length INTEGER,
71
+ extraction_time TIMESTAMPTZ DEFAULT NOW(),
72
+ format TEXT,
73
+ metadata JSONB
74
+ )
75
+ """)
76
+
77
+ # Proxy usage tracking
78
+ await conn.execute("""
79
+ CREATE TABLE IF NOT EXISTS proxy_stats (
80
+ id SERIAL PRIMARY KEY,
81
+ job_id TEXT REFERENCES jobs(id) ON DELETE SET NULL,
82
+ proxy_server TEXT,
83
+ success BOOLEAN,
84
+ error_message TEXT,
85
+ recorded_at TIMESTAMPTZ DEFAULT NOW()
86
+ )
87
+ """)
88
+
89
+ print("📊 Database tables initialized")
90
+
91
+ async def create_job(self, job_id: str, prompt: str, format: str,
92
+ headless: bool = False, streaming_enabled: bool = False,
93
+ proxy_server: Optional[str] = None) -> bool:
94
+ """Create a new job record"""
95
+ if not self.pool:
96
+ return False
97
+
98
+ try:
99
+ async with self.pool.acquire() as conn:
100
+ await conn.execute("""
101
+ INSERT INTO jobs (id, prompt, format, headless, streaming_enabled, proxy_server, status)
102
+ VALUES ($1, $2, $3, $4, $5, $6, 'running')
103
+ """, job_id, prompt, format, headless, streaming_enabled, proxy_server)
104
+ return True
105
+ except Exception as e:
106
+ print(f"❌ Failed to create job: {e}")
107
+ return False
108
+
109
+ async def update_job_status(self, job_id: str, status: str,
110
+ error_message: Optional[str] = None) -> bool:
111
+ """Update job status"""
112
+ if not self.pool:
113
+ return False
114
+
115
+ try:
116
+ async with self.pool.acquire() as conn:
117
+ completed_at = datetime.utcnow() if status in ['completed', 'failed'] else None
118
+ await conn.execute("""
119
+ UPDATE jobs
120
+ SET status = $2,
121
+ completed_at = $3,
122
+ error_message = $4
123
+ WHERE id = $1
124
+ """, job_id, status, completed_at, error_message)
125
+ return True
126
+ except Exception as e:
127
+ print(f"❌ Failed to update job status: {e}")
128
+ return False
129
+
130
+ async def update_job_info(self, job_id: str, file_extension: str,
131
+ content_type: str) -> bool:
132
+ """Update job file information"""
133
+ if not self.pool:
134
+ return False
135
+
136
+ try:
137
+ async with self.pool.acquire() as conn:
138
+ await conn.execute("""
139
+ UPDATE jobs
140
+ SET file_extension = $2, content_type = $3
141
+ WHERE id = $1
142
+ """, job_id, file_extension, content_type)
143
+ return True
144
+ except Exception as e:
145
+ print(f"❌ Failed to update job info: {e}")
146
+ return False
147
+
148
+ async def save_job_result(self, job_id: str, content_length: int,
149
+ format: str, metadata: Dict[str, Any]) -> bool:
150
+ """Save job result metadata"""
151
+ if not self.pool:
152
+ return False
153
+
154
+ try:
155
+ async with self.pool.acquire() as conn:
156
+ await conn.execute("""
157
+ INSERT INTO job_results (job_id, content_length, format, metadata)
158
+ VALUES ($1, $2, $3, $4)
159
+ """, job_id, content_length, format, json.dumps(metadata))
160
+ return True
161
+ except Exception as e:
162
+ print(f"❌ Failed to save job result: {e}")
163
+ return False
164
+
165
+ async def log_proxy_usage(self, job_id: str, proxy_server: str,
166
+ success: bool, error_message: Optional[str] = None) -> bool:
167
+ """Log proxy usage for a job"""
168
+ if not self.pool:
169
+ return False
170
+
171
+ try:
172
+ async with self.pool.acquire() as conn:
173
+ await conn.execute("""
174
+ INSERT INTO proxy_stats (job_id, proxy_server, success, error_message)
175
+ VALUES ($1, $2, $3, $4)
176
+ """, job_id, proxy_server, success, error_message)
177
+ return True
178
+ except Exception as e:
179
+ print(f"❌ Failed to log proxy usage: {e}")
180
+ return False
181
+
182
+ async def get_job(self, job_id: str) -> Optional[Dict[str, Any]]:
183
+ """Get job by ID"""
184
+ if not self.pool:
185
+ return None
186
+
187
+ try:
188
+ async with self.pool.acquire() as conn:
189
+ row = await conn.fetchrow("""
190
+ SELECT * FROM jobs WHERE id = $1
191
+ """, job_id)
192
+
193
+ if row:
194
+ return dict(row)
195
+ return None
196
+ except Exception as e:
197
+ print(f"❌ Failed to get job: {e}")
198
+ return None
199
+
200
+ async def get_all_jobs(self, limit: int = 50, offset: int = 0) -> List[Dict[str, Any]]:
201
+ """Get all jobs with pagination"""
202
+ if not self.pool:
203
+ return []
204
+
205
+ try:
206
+ async with self.pool.acquire() as conn:
207
+ rows = await conn.fetch("""
208
+ SELECT * FROM jobs
209
+ ORDER BY created_at DESC
210
+ LIMIT $1 OFFSET $2
211
+ """, limit, offset)
212
+
213
+ return [dict(row) for row in rows]
214
+ except Exception as e:
215
+ print(f"❌ Failed to get jobs: {e}")
216
+ return []
217
+
218
+ async def get_job_stats(self) -> Dict[str, Any]:
219
+ """Get overall job statistics"""
220
+ if not self.pool:
221
+ return {}
222
+
223
+ try:
224
+ async with self.pool.acquire() as conn:
225
+ stats = await conn.fetchrow("""
226
+ SELECT
227
+ COUNT(*) as total_jobs,
228
+ COUNT(*) FILTER (WHERE status = 'completed') as completed,
229
+ COUNT(*) FILTER (WHERE status = 'failed') as failed,
230
+ COUNT(*) FILTER (WHERE status = 'running') as running
231
+ FROM jobs
232
+ """)
233
+ return dict(stats) if stats else {}
234
+ except Exception as e:
235
+ print(f"❌ Failed to get stats: {e}")
236
+ return {}
237
+
238
+ async def delete_job(self, job_id: str) -> bool:
239
+ """Delete a job and its results"""
240
+ if not self.pool:
241
+ return False
242
+
243
+ try:
244
+ async with self.pool.acquire() as conn:
245
+ await conn.execute("DELETE FROM jobs WHERE id = $1", job_id)
246
+ return True
247
+ except Exception as e:
248
+ print(f"❌ Failed to delete job: {e}")
249
+ return False
250
+
251
+ # Global database instance
252
+ db = Database()
backend/main.py ADDED
@@ -0,0 +1,487 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio, json, os, uuid, shutil, base64
2
+ from fastapi import FastAPI, WebSocket, WebSocketDisconnect, BackgroundTasks, UploadFile, Form
3
+ from fastapi.responses import FileResponse
4
+ from pydantic import BaseModel
5
+ from pathlib import Path
6
+ from backend.smart_browser_controller import SmartBrowserController # Updated import
7
+ from backend.proxy_manager import SmartProxyManager # Updated import
8
+ from backend.agent import run_agent
9
+ from backend.database import db # Database integration
10
+ from backend.telegram_bot import bot_notifier, start_bot # Telegram integration
11
+ from fastapi.staticfiles import StaticFiles
12
+ from fastapi.middleware.cors import CORSMiddleware
13
+
14
+ app = FastAPI()
15
+
16
+
17
+ app.add_middleware(
18
+ CORSMiddleware,
19
+ allow_origins=["*"], # TODO add specific origins in production
20
+ allow_credentials=True,
21
+ allow_methods=["*"],
22
+ allow_headers=["*"],
23
+ )
24
+
25
+ tasks = {} # job_id → async.Task
26
+ ws_subscribers = {} # job_id → { websocket, … }
27
+ streaming_sessions = {} # job_id → browser_controller
28
+ job_info = {} # job_id → { format, content_type, extension, prompt }
29
+
30
+ # Initialize global smart proxy manager
31
+ smart_proxy_manager = SmartProxyManager()
32
+
33
+ OUTPUT_DIR = Path("outputs")
34
+ OUTPUT_DIR.mkdir(exist_ok=True)
35
+
36
+ class JobRequest(BaseModel):
37
+ prompt: str
38
+ format: str = "txt" # txt | md | json | html | csv | pdf
39
+ headless: bool = False
40
+ enable_streaming: bool = False
41
+
42
+ async def store_job_info(job_id: str, info: dict):
43
+ """Store job information for later retrieval"""
44
+ job_info[job_id] = info
45
+ print(f"📊 Stored job info for {job_id}: {info}")
46
+
47
+ @app.post("/job")
48
+ async def create_job(req: JobRequest):
49
+ # Validate format
50
+ valid_formats = ["txt", "md", "json", "html", "csv", "pdf"]
51
+ if req.format not in valid_formats:
52
+ print(f"⚠️ Invalid format '{req.format}', defaulting to 'txt'")
53
+ req.format = "txt"
54
+
55
+ job_id = str(uuid.uuid4())
56
+
57
+ # Use smart proxy manager to get the best available proxy
58
+ proxy_info = smart_proxy_manager.get_best_proxy()
59
+ proxy = proxy_info.to_playwright_dict() if proxy_info else None
60
+ proxy_server = proxy.get("server", "None") if proxy else "None"
61
+
62
+ print(f"🚀 Creating smart job {job_id}")
63
+ print(f"📋 Goal: {req.prompt}")
64
+ print(f"🌐 Format: {req.format}")
65
+ print(f"🖥️ Headless: {req.headless}")
66
+ print(f"📡 Streaming: {req.enable_streaming}")
67
+ print(f"🔄 Selected proxy: {proxy_server}")
68
+
69
+ # Get initial proxy stats
70
+ proxy_stats = smart_proxy_manager.get_proxy_stats()
71
+ print(f"📊 Proxy pool stats: {proxy_stats}")
72
+
73
+ # Store job in database
74
+ await db.create_job(
75
+ job_id=job_id,
76
+ prompt=req.prompt,
77
+ format=req.format,
78
+ headless=req.headless,
79
+ streaming_enabled=req.enable_streaming,
80
+ proxy_server=proxy_server
81
+ )
82
+
83
+ # Send Telegram notification
84
+ asyncio.create_task(bot_notifier.notify_job_started(job_id, req.prompt, req.format))
85
+
86
+ # Create the agent task
87
+ coro = run_agent(job_id, req.prompt, req.format, req.headless, proxy, req.enable_streaming)
88
+ task = asyncio.create_task(coro)
89
+
90
+ # Add callback to notify when done
91
+ def on_task_done(fut):
92
+ try:
93
+ result = fut.result()
94
+ # Job completed successfully
95
+ download_url = f"/download/{job_id}"
96
+ asyncio.create_task(bot_notifier.notify_job_completed(job_id, req.format, download_url))
97
+ except Exception as e:
98
+ # Job failed
99
+ asyncio.create_task(bot_notifier.notify_job_failed(job_id, str(e)))
100
+
101
+ task.add_done_callback(on_task_done)
102
+ tasks[job_id] = task
103
+
104
+ response = {
105
+ "job_id": job_id,
106
+ "format": req.format,
107
+ "proxy_stats": proxy_stats
108
+ }
109
+
110
+ if req.enable_streaming:
111
+ response["streaming_enabled"] = True
112
+ response["stream_url"] = f"ws://localhost:8000/stream/{job_id}"
113
+
114
+ return response
115
+
116
+ @app.websocket("/ws/{job_id}")
117
+ async def job_ws(ws: WebSocket, job_id: str):
118
+ await ws.accept()
119
+ ws_subscribers.setdefault(job_id, set()).add(ws)
120
+
121
+ # Send streaming info if available
122
+ if job_id in streaming_sessions:
123
+ browser_ctrl = streaming_sessions[job_id]
124
+ stream_info = browser_ctrl.get_streaming_info()
125
+ await ws.send_text(json.dumps({
126
+ "type": "streaming_info",
127
+ "streaming": stream_info
128
+ }))
129
+
130
+ # Send initial proxy stats
131
+ proxy_stats = smart_proxy_manager.get_proxy_stats()
132
+ await ws.send_text(json.dumps({
133
+ "type": "proxy_stats",
134
+ "stats": proxy_stats
135
+ }))
136
+
137
+ try:
138
+ while True:
139
+ await ws.receive_text() # keep connection alive
140
+ except WebSocketDisconnect:
141
+ ws_subscribers[job_id].discard(ws)
142
+
143
+ @app.websocket("/stream/{job_id}")
144
+ async def stream_ws(websocket: WebSocket, job_id: str):
145
+ """WebSocket endpoint for real-time browser streaming"""
146
+ await websocket.accept()
147
+
148
+ # Wait for streaming session to be available (with timeout)
149
+ max_wait = 30 # seconds
150
+ wait_time = 0
151
+ while job_id not in streaming_sessions and wait_time < max_wait:
152
+ await asyncio.sleep(0.5)
153
+ wait_time += 0.5
154
+
155
+ if job_id not in streaming_sessions:
156
+ await websocket.send_text(json.dumps({
157
+ "type": "error",
158
+ "message": "Streaming session not available - job may not have streaming enabled"
159
+ }))
160
+ await websocket.close()
161
+ return
162
+
163
+ browser_ctrl = streaming_sessions[job_id]
164
+ browser_ctrl.add_stream_client(websocket)
165
+
166
+ # Send initial connection confirmation
167
+ await websocket.send_text(json.dumps({
168
+ "type": "connected",
169
+ "message": "Connected to browser stream",
170
+ "streaming_active": browser_ctrl.streaming_active
171
+ }))
172
+
173
+ try:
174
+ while True:
175
+ try:
176
+ message = await websocket.receive_text()
177
+ data = json.loads(message)
178
+
179
+ if data['type'] == 'mouse':
180
+ await browser_ctrl.handle_mouse_event(data)
181
+ elif data['type'] == 'keyboard':
182
+ await browser_ctrl.handle_keyboard_event(data)
183
+ elif data['type'] == 'ping':
184
+ await websocket.send_text(json.dumps({"type": "pong"}))
185
+
186
+ except asyncio.TimeoutError:
187
+ await websocket.send_text(json.dumps({"type": "ping"}))
188
+
189
+ except WebSocketDisconnect:
190
+ browser_ctrl.remove_stream_client(websocket)
191
+ print(f"Stream client disconnected from job {job_id}")
192
+ except Exception as e:
193
+ print(f"Error in stream WebSocket: {e}")
194
+ browser_ctrl.remove_stream_client(websocket)
195
+
196
+ @app.post("/streaming/create/{job_id}")
197
+ async def create_streaming_session(job_id: str):
198
+ """Create a streaming session without starting a job"""
199
+ if job_id in streaming_sessions:
200
+ browser_ctrl = streaming_sessions[job_id]
201
+ return browser_ctrl.get_streaming_info()
202
+
203
+ try:
204
+ # Get best available proxy for streaming session
205
+ proxy_info = smart_proxy_manager.get_best_proxy()
206
+ proxy = proxy_info.to_playwright_dict() if proxy_info else None
207
+
208
+ print(f"🎥 Creating streaming session with proxy: {proxy.get('server', 'None') if proxy else 'None'}")
209
+
210
+ # Create smart browser controller with streaming enabled
211
+ browser_ctrl = SmartBrowserController(headless=False, proxy=proxy, enable_streaming=True)
212
+ await browser_ctrl.__aenter__()
213
+ await browser_ctrl.start_streaming(quality=80)
214
+ streaming_sessions[job_id] = browser_ctrl
215
+
216
+ stream_info = browser_ctrl.get_streaming_info()
217
+
218
+ # Add proxy information to stream info
219
+ stream_info["proxy_info"] = {
220
+ "current_proxy": proxy.get("server", "None") if proxy else "None",
221
+ "proxy_stats": smart_proxy_manager.get_proxy_stats()
222
+ }
223
+
224
+ # Broadcast to connected clients
225
+ await broadcast(job_id, {
226
+ "type": "streaming_info",
227
+ "streaming": stream_info
228
+ })
229
+
230
+ return stream_info
231
+
232
+ except Exception as e:
233
+ print(f"❌ Failed to create streaming session: {e}")
234
+ return {"enabled": False, "error": str(e)}
235
+
236
+ @app.get("/streaming/{job_id}")
237
+ async def get_streaming_info(job_id: str):
238
+ """Get streaming connection information for a job"""
239
+ if job_id in streaming_sessions:
240
+ browser_ctrl = streaming_sessions[job_id]
241
+ stream_info = browser_ctrl.get_streaming_info()
242
+
243
+ # Add current proxy stats
244
+ stream_info["proxy_stats"] = smart_proxy_manager.get_proxy_stats()
245
+
246
+ return stream_info
247
+
248
+ return {"enabled": False, "error": "Streaming not enabled for this job"}
249
+
250
+ @app.delete("/streaming/{job_id}")
251
+ async def cleanup_streaming(job_id: str):
252
+ """Clean up streaming session for a job"""
253
+ if job_id in streaming_sessions:
254
+ browser_ctrl = streaming_sessions[job_id]
255
+ try:
256
+ await browser_ctrl.__aexit__(None, None, None)
257
+ except Exception as e:
258
+ print(f"Error cleaning up streaming session: {e}")
259
+ finally:
260
+ del streaming_sessions[job_id]
261
+ return {"message": "Streaming session cleaned up"}
262
+ return {"message": "No streaming session found"}
263
+
264
+ @app.get("/download/{job_id}")
265
+ def download(job_id: str):
266
+ """Enhanced download endpoint that handles all file formats"""
267
+ print(f"📥 Download request for job {job_id}")
268
+
269
+ # Get job information
270
+ if job_id in job_info:
271
+ info = job_info[job_id]
272
+ extension = info.get("extension", "output")
273
+ content_type = info.get("content_type", "application/octet-stream")
274
+ format_name = info.get("format", "unknown")
275
+
276
+ print(f"📋 Job info found: {info}")
277
+ else:
278
+ # Fallback for jobs without stored info
279
+ extension = "output"
280
+ content_type = "application/octet-stream"
281
+ format_name = "unknown"
282
+ print(f"⚠️ No job info found for {job_id}, using fallback")
283
+
284
+ # Try to find the file with proper extension first
285
+ file_path = OUTPUT_DIR / f"{job_id}.{extension}"
286
+
287
+ if not file_path.exists():
288
+ # Fallback: try common extensions
289
+ for fallback_ext in ['txt', 'pdf', 'csv', 'json', 'html', 'md', 'output']:
290
+ fallback_path = OUTPUT_DIR / f"{job_id}.{fallback_ext}"
291
+ if fallback_path.exists():
292
+ file_path = fallback_path
293
+ extension = fallback_ext
294
+ print(f"📁 Found file with fallback extension: {file_path}")
295
+ break
296
+
297
+ if not file_path.exists():
298
+ print(f"❌ File not found: {file_path}")
299
+ from fastapi import HTTPException
300
+ raise HTTPException(status_code=404, detail="File not found")
301
+
302
+ # Generate appropriate filename
303
+ safe_filename = f"extracted_data_{job_id}.{extension}"
304
+
305
+ print(f"✅ Serving file: {file_path}")
306
+ print(f"📄 Content-Type: {content_type}")
307
+ print(f"📎 Filename: {safe_filename}")
308
+
309
+ # Serve file with proper content type and filename
310
+ return FileResponse(
311
+ path=file_path,
312
+ filename=safe_filename,
313
+ media_type=content_type,
314
+ headers={
315
+ "Content-Disposition": f"attachment; filename={safe_filename}",
316
+ "X-File-Format": format_name,
317
+ "X-Original-Extension": extension
318
+ }
319
+ )
320
+
321
+ @app.get("/job/{job_id}/info")
322
+ def get_job_info(job_id: str):
323
+ """Get job information including format and status"""
324
+ if job_id in job_info:
325
+ info = job_info[job_id].copy()
326
+
327
+ # Add file existence check
328
+ extension = info.get("extension", "output")
329
+ file_path = OUTPUT_DIR / f"{job_id}.{extension}"
330
+ info["file_exists"] = file_path.exists()
331
+ info["file_path"] = str(file_path) if file_path.exists() else None
332
+
333
+ # Add current proxy stats
334
+ info["proxy_stats"] = smart_proxy_manager.get_proxy_stats()
335
+
336
+ return info
337
+ else:
338
+ return {"error": "Job not found", "job_id": job_id}
339
+
340
+ @app.get("/proxy/stats")
341
+ def get_proxy_stats():
342
+ """Get current proxy pool statistics"""
343
+ stats = smart_proxy_manager.get_proxy_stats()
344
+ return {
345
+ "proxy_stats": stats,
346
+ "timestamp": asyncio.get_event_loop().time()
347
+ }
348
+
349
+ @app.post("/proxy/reload")
350
+ def reload_proxies():
351
+ """Reload proxy list from environment"""
352
+ try:
353
+ global smart_proxy_manager
354
+ smart_proxy_manager = SmartProxyManager()
355
+ stats = smart_proxy_manager.get_proxy_stats()
356
+ return {
357
+ "success": True,
358
+ "message": "Proxy list reloaded successfully",
359
+ "proxy_stats": stats
360
+ }
361
+ except Exception as e:
362
+ return {
363
+ "success": False,
364
+ "message": f"Failed to reload proxies: {str(e)}"
365
+ }
366
+
367
+ @app.get("/jobs")
368
+ async def get_all_jobs(limit: int = 50, offset: int = 0):
369
+ """Get all jobs from database with pagination"""
370
+ jobs = await db.get_all_jobs(limit, offset)
371
+ stats = await db.get_job_stats()
372
+ return {
373
+ "jobs": jobs,
374
+ "stats": stats,
375
+ "pagination": {
376
+ "limit": limit,
377
+ "offset": offset
378
+ }
379
+ }
380
+
381
+ @app.get("/job/{job_id}")
382
+ async def get_job(job_id: str):
383
+ """Get detailed job information from database"""
384
+ job = await db.get_job(job_id)
385
+ if job:
386
+ # Add file existence check
387
+ extension = job.get("file_extension", "output")
388
+ file_path = OUTPUT_DIR / f"{job_id}.{extension}"
389
+ job["file_exists"] = file_path.exists()
390
+ job["file_path"] = str(file_path) if file_path.exists() else None
391
+ return {"job": job}
392
+ else:
393
+ return {"error": "Job not found", "job_id": job_id}
394
+
395
+ @app.delete("/job/{job_id}")
396
+ async def delete_job(job_id: str):
397
+ """Delete a job from database and remove output file"""
398
+ # Delete from database
399
+ success = await db.delete_job(job_id)
400
+
401
+ # Also delete output file if exists
402
+ job = await db.get_job(job_id)
403
+ if job:
404
+ extension = job.get("file_extension", "output")
405
+ file_path = OUTPUT_DIR / f"{job_id}.{extension}"
406
+ if file_path.exists():
407
+ file_path.unlink()
408
+
409
+ if success:
410
+ return {"message": f"Job {job_id} deleted successfully"}
411
+ else:
412
+ return {"error": "Failed to delete job"}
413
+
414
+ @app.get("/stats")
415
+ async def get_system_stats():
416
+ """Get overall system statistics from database"""
417
+ db_stats = await db.get_job_stats()
418
+ proxy_stats = smart_proxy_manager.get_proxy_stats()
419
+
420
+ return {
421
+ "database": db_stats,
422
+ "proxy": proxy_stats,
423
+ "active_jobs": len(tasks),
424
+ "active_streams": len(streaming_sessions)
425
+ }
426
+
427
+ app.mount("/", StaticFiles(directory="frontend", html=True), name="static")
428
+
429
+ # Helper functions
430
+ async def broadcast(job_id: str, msg: dict):
431
+ """Broadcast message to all subscribers of a job"""
432
+ if job_id in ws_subscribers:
433
+ for ws in list(ws_subscribers[job_id]):
434
+ try:
435
+ await ws.send_text(json.dumps(msg))
436
+ except:
437
+ ws_subscribers[job_id].discard(ws)
438
+
439
+ async def register_streaming_session(job_id: str, browser_ctrl):
440
+ """Register streaming session information"""
441
+ streaming_sessions[job_id] = browser_ctrl
442
+
443
+ if browser_ctrl.enable_streaming:
444
+ await browser_ctrl.start_streaming(quality=80)
445
+
446
+ stream_info = browser_ctrl.get_streaming_info()
447
+ await broadcast(job_id, {
448
+ "type": "streaming_info",
449
+ "streaming": stream_info
450
+ })
451
+
452
+ # Cleanup on shutdown
453
+ @app.on_event("shutdown")
454
+ async def cleanup():
455
+ """Cleanup resources on shutdown"""
456
+ print("🧹 Cleaning up resources...")
457
+
458
+ # Cleanup streaming sessions
459
+ for job_id, browser_ctrl in streaming_sessions.items():
460
+ try:
461
+ await browser_ctrl.__aexit__(None, None, None)
462
+ print(f"✅ Cleaned up streaming session: {job_id}")
463
+ except Exception as e:
464
+ print(f"❌ Error cleaning up session {job_id}: {e}")
465
+
466
+ streaming_sessions.clear()
467
+ job_info.clear()
468
+
469
+ # Disconnect database
470
+ await db.disconnect()
471
+
472
+ # Print final proxy stats
473
+ final_stats = smart_proxy_manager.get_proxy_stats()
474
+ print(f"📊 Final proxy stats: {final_stats}")
475
+
476
+ print("✅ Cleanup completed")
477
+
478
+ @app.on_event("startup")
479
+ async def startup():
480
+ """Initialize database connection and Telegram bot on startup"""
481
+ print("🚀 Starting up BrowserPilot...")
482
+ await db.connect()
483
+
484
+ # Start Telegram bot in background
485
+ asyncio.create_task(start_bot())
486
+
487
+ print("✅ Startup completed")
backend/proxy_manager.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, json, random, time, asyncio, logging
2
+ from typing import Dict, List, Optional, Tuple
3
+ from dataclasses import dataclass
4
+ from enum import Enum
5
+ import base64
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ class ProxyHealth(Enum):
10
+ HEALTHY = "healthy"
11
+ DEGRADED = "degraded"
12
+ BLOCKED = "blocked"
13
+ FAILED = "failed"
14
+
15
+ @dataclass
16
+ class ProxyInfo:
17
+ server: str
18
+ username: Optional[str] = None
19
+ password: Optional[str] = None
20
+ location: str = "unknown"
21
+ health: ProxyHealth = ProxyHealth.HEALTHY
22
+ success_count: int = 0
23
+ failure_count: int = 0
24
+ last_used: float = 0
25
+ blocked_sites: set = None
26
+ response_time: float = 0
27
+ consecutive_failures: int = 0
28
+
29
+ def __post_init__(self):
30
+ if self.blocked_sites is None:
31
+ self.blocked_sites = set()
32
+
33
+ @property
34
+ def success_rate(self) -> float:
35
+ total = self.success_count + self.failure_count
36
+ return self.success_count / total if total > 0 else 1.0
37
+
38
+ def to_playwright_dict(self) -> Dict:
39
+ proxy_dict = {"server": self.server}
40
+ if self.username:
41
+ proxy_dict["username"] = self.username
42
+ if self.password:
43
+ proxy_dict["password"] = self.password
44
+ return proxy_dict
45
+
46
+ class SmartProxyManager:
47
+ def __init__(self, vision_model=None):
48
+ self.proxies: List[ProxyInfo] = []
49
+ self.current_proxy_index = 0
50
+ self.vision_model = vision_model
51
+ self.max_proxy_retries = 5
52
+ self.max_consecutive_failures = 3
53
+
54
+ self._load_proxies()
55
+
56
+ def _load_proxies(self):
57
+ """Load proxies from environment or config"""
58
+ source = os.getenv("SCRAPER_PROXIES", "[]")
59
+ proxy_data = json.loads(source)
60
+
61
+ for proxy in proxy_data:
62
+ if isinstance(proxy, str):
63
+ self.proxies.append(ProxyInfo(server=proxy))
64
+ elif isinstance(proxy, dict):
65
+ self.proxies.append(ProxyInfo(
66
+ server=proxy.get("server", ""),
67
+ username=proxy.get("username"),
68
+ password=proxy.get("password"),
69
+ location=proxy.get("location", "unknown")
70
+ ))
71
+
72
+ logger.info(f"Loaded {len(self.proxies)} proxies for smart rotation")
73
+
74
+ def get_best_proxy(self, exclude_blocked_for: str = None) -> Optional[ProxyInfo]:
75
+ """Get the best available proxy based on performance metrics"""
76
+ if not self.proxies:
77
+ return None
78
+
79
+ # Filter out failed and heavily blocked proxies
80
+ available_proxies = [
81
+ p for p in self.proxies
82
+ if p.health != ProxyHealth.FAILED and
83
+ p.consecutive_failures < self.max_consecutive_failures and
84
+ (not exclude_blocked_for or exclude_blocked_for not in p.blocked_sites)
85
+ ]
86
+
87
+ if not available_proxies:
88
+ # Reset consecutive failures and try again
89
+ for proxy in self.proxies:
90
+ proxy.consecutive_failures = 0
91
+ available_proxies = [p for p in self.proxies if p.health != ProxyHealth.FAILED]
92
+
93
+ if not available_proxies:
94
+ logger.error("No available proxies found!")
95
+ return None
96
+
97
+ # Sort by success rate and response time
98
+ sorted_proxies = sorted(
99
+ available_proxies,
100
+ key=lambda p: (p.success_rate, -p.response_time, -p.last_used),
101
+ reverse=True
102
+ )
103
+
104
+ return sorted_proxies[0]
105
+
106
+ async def detect_anti_bot_with_vision(self, page, goal: str) -> Tuple[bool, str, Optional[str]]:
107
+ """Use vision model to detect anti-bot systems"""
108
+ if not self.vision_model:
109
+ return False, "", None
110
+
111
+ try:
112
+ # Take screenshot for vision analysis
113
+ screenshot_bytes = await page.screenshot(type='png')
114
+ screenshot_b64 = base64.b64encode(screenshot_bytes).decode('utf-8')
115
+
116
+ # Get page content for context
117
+ page_title = await page.title()
118
+ page_url = page.url
119
+
120
+ # Create anti-bot detection prompt
121
+ detection_prompt = f"""
122
+ ANTI-BOT DETECTION TASK:
123
+
124
+ You are analyzing a webpage screenshot to detect if we've encountered an anti-bot system, CAPTCHA, or access restriction.
125
+
126
+ Current URL: {page_url}
127
+ Page Title: {page_title}
128
+ Original Goal: {goal}
129
+
130
+ Look for these indicators:
131
+ 1. **Cloudflare protection pages** - "Checking your browser", "Please wait", security checks
132
+ 2. **CAPTCHA challenges** - Image puzzles, reCAPTCHA, hCaptcha, text verification
133
+ 3. **Access denied pages** - "Access Denied", "Blocked", "Rate Limited"
134
+ 4. **Bot detection warnings** - "Automated traffic detected", "Unusual activity"
135
+ 5. **Verification pages** - Phone verification, email verification, identity checks
136
+ 6. **Error pages** - 403 Forbidden, 429 Rate Limited, 503 Service Unavailable
137
+ 7. **Loading/waiting pages** - Indefinite loading, "Please wait while we verify"
138
+
139
+ Respond with JSON:
140
+ {{
141
+ "is_anti_bot": true/false,
142
+ "detection_type": "cloudflare|captcha|access_denied|rate_limit|verification|error|none",
143
+ "confidence": 0.0-1.0,
144
+ "description": "Brief description of what you see",
145
+ "can_solve": true/false,
146
+ "suggested_action": "rotate_proxy|solve_captcha|wait|retry|abort"
147
+ }}
148
+ """
149
+
150
+ # Use vision model to analyze
151
+ result = await self.vision_model.analyze_anti_bot_page(
152
+ screenshot_b64, detection_prompt, page_url
153
+ )
154
+
155
+ if result.get("is_anti_bot", False):
156
+ detection_type = result.get("detection_type", "unknown")
157
+ suggested_action = result.get("suggested_action", "rotate_proxy")
158
+ description = result.get("description", "Anti-bot system detected")
159
+
160
+ logger.warning(f"🚫 Anti-bot detected: {detection_type} - {description}")
161
+ return True, detection_type, suggested_action
162
+
163
+ return False, "", None
164
+
165
+ except Exception as e:
166
+ logger.error(f"Error in vision-based anti-bot detection: {e}")
167
+ return False, "", None
168
+
169
+ def mark_proxy_success(self, proxy: ProxyInfo, response_time: float = 0):
170
+ """Mark proxy as successful"""
171
+ proxy.success_count += 1
172
+ proxy.consecutive_failures = 0
173
+ proxy.last_used = time.time()
174
+ proxy.response_time = response_time
175
+ proxy.health = ProxyHealth.HEALTHY
176
+ logger.debug(f"✅ Proxy {proxy.server} marked successful")
177
+
178
+ def mark_proxy_failure(self, proxy: ProxyInfo, site_url: str = None, detection_type: str = None):
179
+ """Mark proxy as failed"""
180
+ proxy.failure_count += 1
181
+ proxy.consecutive_failures += 1
182
+
183
+ if detection_type in ["cloudflare", "rate_limit"]:
184
+ proxy.blocked_sites.add(site_url)
185
+ proxy.health = ProxyHealth.BLOCKED
186
+ logger.warning(f"🚫 Proxy {proxy.server} blocked by {detection_type} for {site_url}")
187
+ else:
188
+ proxy.health = ProxyHealth.DEGRADED
189
+
190
+ # Mark as completely failed if too many consecutive failures
191
+ if proxy.consecutive_failures >= self.max_consecutive_failures:
192
+ proxy.health = ProxyHealth.FAILED
193
+ logger.error(f"❌ Proxy {proxy.server} marked as failed after {proxy.consecutive_failures} consecutive failures")
194
+
195
+ def get_proxy_stats(self) -> Dict:
196
+ """Get comprehensive proxy statistics"""
197
+ if not self.proxies:
198
+ return {"total": 0, "healthy": 0, "blocked": 0, "failed": 0, "available": 0}
199
+
200
+ stats = {
201
+ "total": len(self.proxies),
202
+ "healthy": len([p for p in self.proxies if p.health == ProxyHealth.HEALTHY]),
203
+ "degraded": len([p for p in self.proxies if p.health == ProxyHealth.DEGRADED]),
204
+ "blocked": len([p for p in self.proxies if p.health == ProxyHealth.BLOCKED]),
205
+ "failed": len([p for p in self.proxies if p.health == ProxyHealth.FAILED]),
206
+ "available": len([p for p in self.proxies if p.health != ProxyHealth.FAILED and p.consecutive_failures < self.max_consecutive_failures])
207
+ }
208
+ return stats
backend/smart_browser_controller.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## used to manage browser navigation with intelligent anti-bot detection and proxy rotation
2
+
3
+ import asyncio
4
+ import time
5
+ from urllib.parse import urlparse
6
+ from backend.browser_controller import BrowserController
7
+ from backend.proxy_manager import SmartProxyManager
8
+ from backend.anti_bot_detection import AntiBotVisionModel
9
+ import logging
10
+ import base64
11
+ logger = logging.getLogger(__name__)
12
+
13
+ class SmartBrowserController(BrowserController):
14
+ def __init__(self, headless: bool, proxy: dict | None, enable_streaming: bool = False):
15
+ super().__init__(headless, proxy, enable_streaming)
16
+
17
+ # Initialize smart proxy management
18
+ self.vision_model = AntiBotVisionModel()
19
+ self.proxy_manager = SmartProxyManager(self.vision_model)
20
+ self.current_proxy = proxy
21
+ self.max_proxy_retries = 5
22
+ self.proxy_retry_count = 0
23
+ self.max_captcha_solve_attempts = 3
24
+ self.captcha_solve_count = 0
25
+
26
+ async def smart_navigate(self, url: str, wait_until: str = "domcontentloaded", timeout: int = 30000) -> bool:
27
+ """Navigate with intelligent anti-bot detection and proxy rotation"""
28
+ site_domain = urlparse(url).netloc
29
+
30
+ for attempt in range(self.max_proxy_retries):
31
+ try:
32
+ logger.info(f"🌐 Smart navigation attempt {attempt + 1}/{self.max_proxy_retries} to: {url}")
33
+ start_time = time.time()
34
+
35
+ # Navigate to the page
36
+ response = await self.page.goto(url, wait_until=wait_until, timeout=timeout)
37
+ response_time = time.time() - start_time
38
+
39
+ # Wait a moment for page to fully load
40
+ await asyncio.sleep(2)
41
+
42
+ # Use vision model to detect anti-bot systems
43
+ is_antibot, detection_type, suggested_action = await self.proxy_manager.detect_anti_bot_with_vision(
44
+ self.page, f"navigate to {url}"
45
+ )
46
+
47
+ if is_antibot:
48
+ logger.warning(f"🚫 Anti-bot detected: {detection_type}, suggested action: {suggested_action}")
49
+
50
+ # Handle based on suggested action
51
+ if suggested_action == "solve_captcha" and self.captcha_solve_count < self.max_captcha_solve_attempts:
52
+ success = await self._attempt_captcha_solve(url, detection_type)
53
+ if success:
54
+ logger.info("✅ CAPTCHA solved successfully!")
55
+ if self.current_proxy:
56
+ proxy_info = next((p for p in self.proxy_manager.proxies if p.to_playwright_dict() == self.current_proxy), None)
57
+ if proxy_info:
58
+ self.proxy_manager.mark_proxy_success(proxy_info, response_time)
59
+ return True
60
+ else:
61
+ self.captcha_solve_count += 1
62
+
63
+ if suggested_action in ["rotate_proxy", "retry"] or self.captcha_solve_count >= self.max_captcha_solve_attempts:
64
+ # Mark current proxy as failed
65
+ if self.current_proxy:
66
+ proxy_info = next((p for p in self.proxy_manager.proxies if p.to_playwright_dict() == self.current_proxy), None)
67
+ if proxy_info:
68
+ self.proxy_manager.mark_proxy_failure(proxy_info, site_domain, detection_type)
69
+
70
+ # Try with new proxy
71
+ if attempt < self.max_proxy_retries - 1:
72
+ new_proxy_info = self.proxy_manager.get_best_proxy(exclude_blocked_for=site_domain)
73
+ if new_proxy_info:
74
+ new_proxy = new_proxy_info.to_playwright_dict()
75
+ logger.info(f"🔄 Rotating to new proxy: {new_proxy['server']}")
76
+ await self._restart_browser_with_proxy(new_proxy)
77
+ await asyncio.sleep(3) # Wait before retry
78
+ continue
79
+ else:
80
+ logger.error("❌ No available proxies for rotation")
81
+ return False
82
+
83
+ if suggested_action == "abort":
84
+ logger.error(f"❌ Aborting navigation due to unresolvable anti-bot: {detection_type}")
85
+ return False
86
+
87
+ else:
88
+ # Success! No anti-bot detected
89
+ logger.info(f"✅ Successfully navigated to: {url}")
90
+ if self.current_proxy:
91
+ proxy_info = next((p for p in self.proxy_manager.proxies if p.to_playwright_dict() == self.current_proxy), None)
92
+ if proxy_info:
93
+ self.proxy_manager.mark_proxy_success(proxy_info, response_time)
94
+ self.proxy_retry_count = 0
95
+ self.captcha_solve_count = 0
96
+ return True
97
+
98
+ except Exception as e:
99
+ logger.error(f"❌ Navigation failed on attempt {attempt + 1}: {e}")
100
+
101
+ # Mark proxy failure and try another
102
+ if self.current_proxy:
103
+ proxy_info = next((p for p in self.proxy_manager.proxies if p.to_playwright_dict() == self.current_proxy), None)
104
+ if proxy_info:
105
+ self.proxy_manager.mark_proxy_failure(proxy_info, site_domain, "connection_error")
106
+
107
+ if attempt < self.max_proxy_retries - 1:
108
+ new_proxy_info = self.proxy_manager.get_best_proxy(exclude_blocked_for=site_domain)
109
+ if new_proxy_info:
110
+ new_proxy = new_proxy_info.to_playwright_dict()
111
+ logger.info(f"🔄 Retrying with new proxy due to connection error")
112
+ await self._restart_browser_with_proxy(new_proxy)
113
+ await asyncio.sleep(3)
114
+ continue
115
+
116
+ logger.error(f"❌ Failed to navigate to {url} after all retries")
117
+ return False
118
+
119
+ async def _attempt_captcha_solve(self, url: str, detection_type: str) -> bool:
120
+ """Attempt to solve CAPTCHA using vision model"""
121
+ try:
122
+ logger.info(f"🧩 Attempting to solve {detection_type} CAPTCHA...")
123
+
124
+ # Take screenshot for CAPTCHA analysis
125
+ screenshot_bytes = await self.page.screenshot(type='png')
126
+ screenshot_b64 = base64.b64encode(screenshot_bytes).decode('utf-8')
127
+
128
+ # Use vision model to solve CAPTCHA
129
+ solution = await self.vision_model.solve_captcha(screenshot_b64, url, detection_type)
130
+
131
+ if solution.get("can_solve", False) and solution.get("confidence", 0) > 0.7:
132
+ logger.info(f"🎯 CAPTCHA solution found: {solution.get('solution', 'N/A')}")
133
+
134
+ # Implement CAPTCHA solving logic based on solution type
135
+ success = await self._apply_captcha_solution(solution)
136
+ return success
137
+ else:
138
+ logger.warning(f"❌ Could not solve CAPTCHA: {solution.get('instructions', 'Unknown reason')}")
139
+ return False
140
+
141
+ except Exception as e:
142
+ logger.error(f"❌ Error attempting CAPTCHA solve: {e}")
143
+ return False
144
+
145
+ async def _apply_captcha_solution(self, solution: dict) -> bool:
146
+ """Apply the CAPTCHA solution to the page"""
147
+ try:
148
+ solution_type = solution.get("solution_type", "unknown")
149
+ solution_value = solution.get("solution", "")
150
+
151
+ if solution_type == "text":
152
+ # Find text input and enter solution
153
+ text_inputs = await self.page.query_selector_all('input[type="text"], input:not([type])')
154
+ for input_elem in text_inputs:
155
+ if await input_elem.is_visible():
156
+ await input_elem.fill(solution_value)
157
+ await asyncio.sleep(1)
158
+
159
+ # Look for submit button
160
+ submit_buttons = await self.page.query_selector_all('button, input[type="submit"]')
161
+ for button in submit_buttons:
162
+ if await button.is_visible():
163
+ await button.click()
164
+ await asyncio.sleep(3)
165
+ return True
166
+
167
+ elif solution_type == "selection":
168
+ # Handle image selection CAPTCHAs
169
+ logger.warning("🚧 Image selection CAPTCHA solving not fully implemented")
170
+ return False
171
+
172
+ elif solution_type == "math":
173
+ # Similar to text but specifically for math solutions
174
+ text_inputs = await self.page.query_selector_all('input[type="text"], input:not([type])')
175
+ for input_elem in text_inputs:
176
+ if await input_elem.is_visible():
177
+ await input_elem.fill(str(solution_value))
178
+ await asyncio.sleep(1)
179
+
180
+ submit_buttons = await self.page.query_selector_all('button, input[type="submit"]')
181
+ for button in submit_buttons:
182
+ if await button.is_visible():
183
+ await button.click()
184
+ await asyncio.sleep(3)
185
+ return True
186
+
187
+ return False
188
+
189
+ except Exception as e:
190
+ logger.error(f"❌ Error applying CAPTCHA solution: {e}")
191
+ return False
192
+
193
+ async def _restart_browser_with_proxy(self, new_proxy: dict):
194
+ """Restart browser with new proxy"""
195
+ try:
196
+ # Close current browser
197
+ if self.browser:
198
+ await self.browser.close()
199
+
200
+ # Update proxy
201
+ self.current_proxy = new_proxy
202
+
203
+ # Launch new browser with new proxy
204
+ launch_options = {
205
+ "headless": self.headless,
206
+ "args": [
207
+ "--no-sandbox",
208
+ "--disable-dev-shm-usage",
209
+ "--disable-gpu",
210
+ "--disable-web-security",
211
+ "--disable-features=VizDisplayCompositor",
212
+ "--window-size=1280,800",
213
+ "--window-position=0,0",
214
+ "--disable-blink-features=AutomationControlled",
215
+ "--disable-extensions",
216
+ "--no-first-run",
217
+ "--disable-default-apps",
218
+ "--remote-debugging-port=0"
219
+ ]
220
+ }
221
+
222
+ if new_proxy:
223
+ launch_options["proxy"] = new_proxy
224
+
225
+ self.browser = await self.play.chromium.launch(**launch_options)
226
+ self.page = await self.browser.new_page(viewport={"width": 1280, "height": 800})
227
+
228
+ # Re-setup CDP streaming if enabled
229
+ if self.enable_streaming:
230
+ await self._setup_cdp_streaming()
231
+
232
+ # Set headers with randomization
233
+ await self.page.set_extra_http_headers({
234
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
235
+ })
236
+
237
+ logger.info("✅ Browser restarted with new proxy")
238
+
239
+ except Exception as e:
240
+ logger.error(f"❌ Failed to restart browser with new proxy: {e}")
241
+ raise
242
+
243
+ def get_proxy_stats(self) -> dict:
244
+ """Get current proxy statistics"""
245
+ stats = self.proxy_manager.get_proxy_stats()
246
+ stats.update({
247
+ "current_proxy": self.current_proxy.get("server", "None") if self.current_proxy else "None",
248
+ "retry_count": self.proxy_retry_count,
249
+ "captcha_solve_count": self.captcha_solve_count
250
+ })
251
+ return stats
252
+
253
+ # Override the goto method to use smart navigation
254
+ async def goto(self, url: str, wait_until: str = "domcontentloaded", timeout: int = 30000):
255
+ """Navigate to a URL with smart anti-bot detection"""
256
+ success = await self.smart_navigate(url, wait_until, timeout)
257
+ if not success:
258
+ raise Exception(f"Failed to navigate to {url} after intelligent retry attempts")
backend/telegram_bot.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Telegram Bot Integration for BrowserPilot
3
+ - Job completion notifications
4
+ - Remote control commands
5
+ - Keepalive alerts
6
+ """
7
+ import os
8
+ import asyncio
9
+ from typing import Optional
10
+ from telegram import Bot, Update
11
+ from telegram.ext import Application, CommandHandler, MessageHandler, filters, ContextTypes
12
+
13
+ class TelegramNotifier:
14
+ def __init__(self):
15
+ self.token = os.getenv("TELEGRAM_BOT_TOKEN")
16
+ self.chat_id = os.getenv("TELEGRAM_CHAT_ID")
17
+ self.bot: Optional[Bot] = None
18
+ self.app = None
19
+ self._initialized = False
20
+
21
+ async def initialize(self):
22
+ """Initialize bot"""
23
+ if not self.token or not self.chat_id:
24
+ print("⚠️ Telegram not configured (missing TOKEN or CHAT_ID)")
25
+ return
26
+
27
+ try:
28
+ self.bot = Bot(token=self.token)
29
+ await self.bot.get_me()
30
+ self._initialized = True
31
+ print(f"✅ Telegram bot initialized: @{self.bot.username}")
32
+ except Exception as e:
33
+ print(f"❌ Telegram init failed: {e}")
34
+ self._initialized = False
35
+
36
+ async def send_message(self, message: str, parse_mode: str = "HTML"):
37
+ """Send message to configured chat"""
38
+ if not self._initialized:
39
+ return
40
+
41
+ try:
42
+ await self.bot.send_message(
43
+ chat_id=self.chat_id,
44
+ text=message,
45
+ parse_mode=parse_mode
46
+ )
47
+ except Exception as e:
48
+ print(f"❌ Failed to send Telegram message: {e}")
49
+
50
+ async def notify_job_started(self, job_id: str, prompt: str, format: str):
51
+ """Notify when a job starts"""
52
+ message = (
53
+ "🚀 <b>Job Started</b>\n\n"
54
+ f"<b>ID:</b> <code>{job_id}</code>\n"
55
+ f"<b>Task:</b> {prompt[:200]}\n"
56
+ f"<b>Format:</b> {format}\n\n"
57
+ "⏳ Processing..."
58
+ )
59
+ await self.send_message(message)
60
+
61
+ async def notify_job_completed(self, job_id: str, format: str, download_url: str):
62
+ """Notify when a job completes"""
63
+ message = (
64
+ "✅ <b>Job Completed!</b>\n\n"
65
+ f"<b>ID:</b> <code>{job_id}</code>\n"
66
+ f"<b>Format:</b> {format}\n\n"
67
+ f"📥 <a href='{download_url}'>Download Result</a>"
68
+ )
69
+ await self.send_message(message)
70
+
71
+ async def notify_job_failed(self, job_id: str, error: str):
72
+ """Notify when a job fails"""
73
+ message = (
74
+ "❌ <b>Job Failed</b>\n\n"
75
+ f"<b>ID:</b> <code>{job_id}</code>\n"
76
+ f"<b>Error:</b> {error[:500]}"
77
+ )
78
+ await self.send_message(message)
79
+
80
+ async def notify_keepalive_failed(self, status_code: int):
81
+ """Notify when keepalive check fails"""
82
+ message = (
83
+ "⚠️ <b>KeepAlive Alert</b>\n\n"
84
+ "🔴 HF Space health check failed!\n"
85
+ f"<b>Status:</b> {status_code}\n\n"
86
+ "The Space might be sleeping or down."
87
+ )
88
+ await self.send_message(message)
89
+
90
+ async def notify_keepalive_restored(self):
91
+ """Notify when keepalive check succeeds after failure"""
92
+ message = (
93
+ "✅ <b>KeepAlive Restored</b>\n\n"
94
+ "🟢 HF Space is back online!\n\n"
95
+ "Health check passed."
96
+ )
97
+ await self.send_message(message)
98
+
99
+ # Command handlers for bot control
100
+ async def start_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
101
+ """Handle /start command"""
102
+ await update.message.reply_text(
103
+ "🤖 <b>BrowserPilot Bot</b>\n\n"
104
+ "Commands:\n"
105
+ "/start - Show this help\n"
106
+ "/status - Check system status\n"
107
+ "/jobs - List recent jobs\n"
108
+ "/ping - Check if bot is alive\n\n"
109
+ "To create a job, send a message with your task."
110
+ )
111
+
112
+ async def ping_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
113
+ """Handle /ping command"""
114
+ await update.message.reply_text("🟢 Bot is alive!")
115
+
116
+ async def status_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
117
+ """Handle /status command"""
118
+ from backend.main import smart_proxy_manager, tasks, streaming_sessions
119
+
120
+ proxy_stats = smart_proxy_manager.get_proxy_stats()
121
+
122
+ message = (
123
+ "📊 <b>System Status</b>\n\n"
124
+ f"<b>Active Jobs:</b> {len(tasks)}\n"
125
+ f"<b>Active Streams:</b> {len(streaming_sessions)}\n"
126
+ f"<b>Proxies Available:</b> {proxy_stats.get('available', 0)}/{proxy_stats.get('total', 0)}\n\n"
127
+ f"<b>Uptime:</b> Running"
128
+ )
129
+ await update.message.reply_text(message)
130
+
131
+ async def jobs_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
132
+ """Handle /jobs command"""
133
+ from backend.database import db
134
+
135
+ jobs = await db.get_all_jobs(limit=5)
136
+
137
+ if not jobs:
138
+ await update.message.reply_text("📋 No jobs found.")
139
+ return
140
+
141
+ message = "📋 <b>Recent Jobs</b>\n\n"
142
+ for job in jobs[:5]:
143
+ status_emoji = {"completed": "✅", "failed": "❌", "running": "🔄"}.get(job.get("status"), "⏳")
144
+ message += (
145
+ f"{status_emoji} <code>{job.get('id', 'unknown')[:8]}</code>\n"
146
+ f" {job.get('prompt', 'No prompt')[:50]}...\n"
147
+ f" Format: {job.get('format', 'unknown')} | Status: {job.get('status', 'unknown')}\n\n"
148
+ )
149
+
150
+ await update.message.reply_text(message)
151
+
152
+ async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
153
+ """Handle text messages as job prompts"""
154
+ from backend.main import create_job, JobRequest
155
+
156
+ prompt = update.message.text
157
+
158
+ if not prompt:
159
+ return
160
+
161
+ # Create a job request
162
+ req = JobRequest(prompt=prompt, format="json", headless=True, enable_streaming=False)
163
+
164
+ # Create the job
165
+ try:
166
+ result = await create_job(req)
167
+ job_id = result["job_id"]
168
+
169
+ await update.message.reply_text(
170
+ f"✅ <b>Job Created!</b>\n\n"
171
+ f"<b>ID:</b> <code>{job_id}</code>\n"
172
+ f"<b>Task:</b> {prompt[:100]}...\n\n"
173
+ "I'll notify you when it's done!"
174
+ )
175
+
176
+ # Also notify via notifier (for consistency)
177
+ notifier = TelegramNotifier()
178
+ await notifier.initialize()
179
+ await notifier.notify_job_started(job_id, prompt, "json")
180
+
181
+ except Exception as e:
182
+ await update.message.reply_text(f"❌ Failed to create job: {str(e)}")
183
+
184
+ # Global bot instance
185
+ bot_notifier = TelegramNotifier()
186
+
187
+ async def start_bot():
188
+ """Start the Telegram bot"""
189
+ await bot_notifier.initialize()
190
+
191
+ if not bot_notifier._initialized:
192
+ print("⚠️ Telegram bot not started (missing credentials)")
193
+ return
194
+
195
+ # Create application
196
+ application = Application.builder().token(bot_notifier.token).build()
197
+
198
+ # Add handlers
199
+ application.add_handler(CommandHandler("start", start_command))
200
+ application.add_handler(CommandHandler("ping", ping_command))
201
+ application.add_handler(CommandHandler("status", status_command))
202
+ application.add_handler(CommandHandler("jobs", jobs_command))
203
+ application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))
204
+
205
+ # Start polling
206
+ print("🤖 Starting Telegram bot polling...")
207
+ await application.start_polling(allowed_updates=Update.ALL_TYPES)
backend/universal_extractor.py ADDED
@@ -0,0 +1,607 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import asyncio
3
+ import functools
4
+ from typing import Dict, Any, List, Optional
5
+ import google.generativeai as genai
6
+ from backend.browser_controller import BrowserController
7
+ import base64
8
+ from bs4 import BeautifulSoup
9
+ import pandas as pd
10
+ from reportlab.lib.pagesizes import letter
11
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
12
+ from reportlab.lib.styles import getSampleStyleSheet
13
+ from pathlib import Path
14
+ import re
15
+
16
+ MODEL = genai.GenerativeModel("gemini-2.5-flash-preview-05-20")
17
+
18
+ UNIVERSAL_EXTRACTION_PROMPT = """
19
+ You are a universal data extraction specialist. Your task is to analyze any webpage and extract the most relevant information based on the user's specific goal.
20
+
21
+ USER'S GOAL: {goal}
22
+ CURRENT URL: {url}
23
+ PAGE TITLE: {title}
24
+ WEBSITE TYPE: {website_type}
25
+
26
+ EXTRACTION GUIDELINES:
27
+
28
+ **For PERSON/PROFILE information:**
29
+ - Full name and professional title
30
+ - Current position and company
31
+ - Professional background and experience
32
+ - Education and credentials
33
+ - Skills and expertise areas
34
+ - Contact information (if publicly available)
35
+ - Notable achievements or projects
36
+ - Social media profiles and professional links
37
+
38
+ **For COMPANY/ORGANIZATION information:**
39
+ - Company name and industry
40
+ - Mission, vision, and description
41
+ - Products or services offered
42
+ - Leadership team and key personnel
43
+ - Company size and locations
44
+ - Contact information and headquarters
45
+ - Recent news, funding, or updates
46
+ - Key statistics or metrics
47
+
48
+ **For PRODUCT/SERVICE information:**
49
+ - Product/service name and category
50
+ - Key features and specifications
51
+ - Pricing information
52
+ - User reviews and ratings
53
+ - Availability and purchasing options
54
+ - Technical requirements
55
+ - Comparison with alternatives
56
+
57
+ **For NEWS/CONTENT information:**
58
+ - Article headline and summary
59
+ - Publication date and source
60
+ - Key facts and main points
61
+ - Author information
62
+ - Related topics or tags
63
+ - Important quotes or statistics
64
+
65
+ **For DATA/RESEARCH information:**
66
+ - Main findings or conclusions
67
+ - Statistical data and metrics
68
+ - Methodology or sources
69
+ - Publication details
70
+ - Key insights and implications
71
+
72
+ **For GENERAL INFORMATION:**
73
+ - Extract the main facts relevant to the user's goal
74
+ - Include supporting details and context
75
+ - Provide sources and references when available
76
+ - Focus on actionable or useful information
77
+
78
+ IMPORTANT:
79
+ - Only extract information that is VISIBLE and RELEVANT to the user's goal
80
+ - Organize information in a clear, structured format
81
+ - Include metadata about the source and extraction context
82
+ - Be comprehensive but avoid irrelevant details
83
+ - If the page doesn't contain the requested information, clearly state what was found instead
84
+
85
+ WEBPAGE CONTENT:
86
+ {content}
87
+
88
+ Return a well-structured JSON object with the extracted information:
89
+ """
90
+
91
+ class UniversalExtractor:
92
+ def __init__(self):
93
+ self.extraction_cache = {}
94
+
95
+ async def extract_intelligent_content(self, browser: BrowserController, goal: str, fmt: str = "json", job_id: str = None) -> str:
96
+ """Extract content intelligently from any website based on user's goal"""
97
+ try:
98
+ # Get comprehensive page information
99
+ url = browser.page.url
100
+ title = await browser.page.title()
101
+
102
+ # Detect website type
103
+ website_type = self._detect_website_type(url, title)
104
+
105
+ # Get clean, structured content
106
+ content = await self._get_structured_content(browser)
107
+
108
+ # Use AI to extract relevant information
109
+ extracted_data = await self._ai_extract(goal, url, title, website_type, content)
110
+
111
+ # Format the output based on requested format
112
+ return await self._format_output(extracted_data, fmt, goal, job_id) # Pass job_id
113
+
114
+ except Exception as e:
115
+ print(f"❌ Universal extraction failed: {e}")
116
+ return await self._fallback_extraction(browser, fmt, goal)
117
+
118
+ def _detect_website_type(self, url: str, title: str) -> str:
119
+ """Detect website type for better extraction strategy"""
120
+ url_lower = url.lower()
121
+ title_lower = title.lower()
122
+
123
+ # Professional networks
124
+ if "linkedin.com" in url_lower:
125
+ return "linkedin_profile"
126
+ if "github.com" in url_lower:
127
+ return "github_profile"
128
+
129
+ # Social media
130
+ if any(domain in url_lower for domain in ["twitter.com", "facebook.com", "instagram.com"]):
131
+ return "social_media"
132
+
133
+ # E-commerce
134
+ if any(domain in url_lower for domain in ["amazon", "ebay", "shopify", "etsy"]):
135
+ return "ecommerce"
136
+
137
+ # News and content
138
+ if any(word in title_lower for word in ["news", "article", "blog", "post"]):
139
+ return "news_content"
140
+
141
+ # Company websites
142
+ if any(word in title_lower for word in ["company", "corp", "about", "careers"]):
143
+ return "company_website"
144
+
145
+ # Search results
146
+ if "/search" in url_lower or "google.com" in url_lower:
147
+ return "search_results"
148
+
149
+ return "general_website"
150
+
151
+ async def _get_structured_content(self, browser: BrowserController) -> str:
152
+ """Get clean, structured content from the page"""
153
+ try:
154
+ # Get HTML content
155
+ html = await browser.page.content()
156
+ soup = BeautifulSoup(html, 'html.parser')
157
+
158
+ # Remove script, style, and other non-content elements
159
+ for tag in soup(['script', 'style', 'nav', 'footer', 'header', 'aside', 'advertisement']):
160
+ tag.decompose()
161
+
162
+ # Extract main content areas
163
+ main_content = []
164
+
165
+ # Look for main content containers
166
+ main_containers = soup.find_all(['main', 'article', 'section']) or [soup.find('body')]
167
+
168
+ for container in main_containers[:3]: # Limit to avoid too much content
169
+ if container:
170
+ # Extract headings
171
+ headings = container.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
172
+ for heading in headings:
173
+ if heading.get_text(strip=True):
174
+ main_content.append(f"HEADING: {heading.get_text(strip=True)}")
175
+
176
+ # Extract paragraphs
177
+ paragraphs = container.find_all('p')
178
+ for p in paragraphs[:20]: # Limit paragraphs
179
+ text = p.get_text(strip=True)
180
+ if len(text) > 20: # Only meaningful paragraphs
181
+ main_content.append(f"TEXT: {text}")
182
+
183
+ # Extract lists
184
+ lists = container.find_all(['ul', 'ol'])
185
+ for list_elem in lists[:5]: # Limit lists
186
+ items = list_elem.find_all('li')
187
+ if items:
188
+ main_content.append("LIST:")
189
+ for item in items[:10]: # Limit list items
190
+ text = item.get_text(strip=True)
191
+ if text:
192
+ main_content.append(f" - {text}")
193
+
194
+ # Extract table data
195
+ tables = container.find_all('table')
196
+ for table in tables[:3]: # Limit tables
197
+ rows = table.find_all('tr')
198
+ if rows:
199
+ main_content.append("TABLE:")
200
+ for row in rows[:10]: # Limit rows
201
+ cells = row.find_all(['td', 'th'])
202
+ if cells:
203
+ row_text = " | ".join([cell.get_text(strip=True) for cell in cells])
204
+ if row_text.strip():
205
+ main_content.append(f" {row_text}")
206
+
207
+ # Join and limit content
208
+ content = "\n".join(main_content)
209
+ return content[:12000] # Limit total content to avoid token limits
210
+
211
+ except Exception as e:
212
+ print(f"❌ Error getting structured content: {e}")
213
+ # Fallback to simple text extraction
214
+ try:
215
+ return await browser.page.inner_text("body")[:8000]
216
+ except:
217
+ return "Content extraction failed"
218
+
219
+ async def _ai_extract(self, goal: str, url: str, title: str, website_type: str, content: str) -> Dict[str, Any]:
220
+ """Use AI to extract relevant information based on context"""
221
+ try:
222
+ prompt = UNIVERSAL_EXTRACTION_PROMPT.format(
223
+ goal=goal,
224
+ url=url,
225
+ title=title,
226
+ website_type=website_type,
227
+ content=content
228
+ )
229
+
230
+ response = await asyncio.to_thread(
231
+ functools.partial(MODEL.generate_content, prompt)
232
+ )
233
+
234
+ # Parse AI response
235
+ raw_text = response.text
236
+
237
+ # Extract JSON from response
238
+ start = raw_text.find('{')
239
+ end = raw_text.rfind('}') + 1
240
+
241
+ if start != -1 and end > start:
242
+ json_str = raw_text[start:end]
243
+ extracted_data = json.loads(json_str)
244
+
245
+ # Add metadata
246
+ extracted_data["_metadata"] = {
247
+ "source_url": url,
248
+ "page_title": title,
249
+ "website_type": website_type,
250
+ "extraction_goal": goal,
251
+ "extraction_timestamp": asyncio.get_event_loop().time(),
252
+ "extraction_method": "ai_powered"
253
+ }
254
+
255
+ return extracted_data
256
+ else:
257
+ # Fallback: structure the raw text
258
+ return {
259
+ "extracted_content": raw_text,
260
+ "content_type": "unstructured_text",
261
+ "_metadata": {
262
+ "source_url": url,
263
+ "page_title": title,
264
+ "website_type": website_type,
265
+ "extraction_goal": goal,
266
+ "extraction_timestamp": asyncio.get_event_loop().time(),
267
+ "extraction_method": "text_fallback"
268
+ }
269
+ }
270
+
271
+ except Exception as e:
272
+ print(f"❌ AI extraction failed: {e}")
273
+ return self._create_fallback_structure(content, url, title, website_type, goal)
274
+
275
+ def _create_fallback_structure(self, content: str, url: str, title: str, website_type: str, goal: str) -> Dict[str, Any]:
276
+ """Create structured fallback when AI extraction fails"""
277
+ return {
278
+ "extraction_status": "fallback_mode",
279
+ "raw_content": content[:2000], # Truncated content
280
+ "content_summary": self._create_simple_summary(content),
281
+ "_metadata": {
282
+ "source_url": url,
283
+ "page_title": title,
284
+ "website_type": website_type,
285
+ "extraction_goal": goal,
286
+ "extraction_method": "fallback_structure",
287
+ "note": "AI extraction failed, using fallback method"
288
+ }
289
+ }
290
+
291
+ def _create_simple_summary(self, content: str) -> Dict[str, Any]:
292
+ """Create a simple summary of content without AI"""
293
+ lines = content.split('\n')
294
+
295
+ summary = {
296
+ "headings": [],
297
+ "key_text": [],
298
+ "lists": [],
299
+ "total_lines": len(lines)
300
+ }
301
+
302
+ current_list = []
303
+
304
+ for line in lines[:50]: # Limit processing
305
+ line = line.strip()
306
+ if not line:
307
+ continue
308
+
309
+ if line.startswith("HEADING:"):
310
+ summary["headings"].append(line[8:].strip())
311
+ elif line.startswith("TEXT:"):
312
+ text = line[5:].strip()
313
+ if len(text) > 30: # Only substantial text
314
+ summary["key_text"].append(text[:200])
315
+ elif line.startswith("LIST:"):
316
+ if current_list:
317
+ summary["lists"].append(current_list)
318
+ current_list = []
319
+ elif line.startswith(" -"):
320
+ current_list.append(line[4:].strip())
321
+
322
+ if current_list:
323
+ summary["lists"].append(current_list)
324
+
325
+ return summary
326
+
327
+ async def _format_output(self, data: Dict[str, Any], fmt: str, goal: str, job_id: str = None) -> str:
328
+ """Format extracted data in the requested format"""
329
+ if fmt == "json":
330
+ return json.dumps(data, indent=2, ensure_ascii=False)
331
+ elif fmt == "txt":
332
+ return self._format_as_text(data)
333
+ elif fmt == "md":
334
+ return self._format_as_markdown(data)
335
+ elif fmt == "html":
336
+ return self._format_as_html(data)
337
+ elif fmt == "csv":
338
+ return self._format_as_csv(data)
339
+ elif fmt == "pdf":
340
+ return await self._format_as_pdf(data, goal, job_id) # Pass job_id
341
+ else:
342
+ return json.dumps(data, indent=2, ensure_ascii=False)
343
+
344
+
345
+ def _format_as_text(self, data: Dict[str, Any]) -> str:
346
+ """Format as clean text"""
347
+ lines = []
348
+ metadata = data.get("_metadata", {})
349
+
350
+ if metadata:
351
+ lines.append(f"EXTRACTED INFORMATION")
352
+ lines.append(f"Source: {metadata.get('source_url', 'Unknown')}")
353
+ lines.append(f"Goal: {metadata.get('extraction_goal', 'Unknown')}")
354
+ lines.append(f"Website Type: {metadata.get('website_type', 'Unknown')}")
355
+ lines.append("-" * 60)
356
+ lines.append("")
357
+
358
+ def format_item(key: str, value, indent: int = 0):
359
+ spaces = " " * indent
360
+ if isinstance(value, dict):
361
+ if key != "_metadata":
362
+ lines.append(f"{spaces}{key.replace('_', ' ').title()}:")
363
+ for k, v in value.items():
364
+ format_item(k, v, indent + 1)
365
+ elif isinstance(value, list):
366
+ lines.append(f"{spaces}{key.replace('_', ' ').title()}:")
367
+ for item in value:
368
+ if isinstance(item, str):
369
+ lines.append(f"{spaces} • {item}")
370
+ else:
371
+ lines.append(f"{spaces} • {str(item)}")
372
+ else:
373
+ lines.append(f"{spaces}{key.replace('_', ' ').title()}: {value}")
374
+
375
+ for key, value in data.items():
376
+ format_item(key, value)
377
+
378
+ return "\n".join(lines)
379
+
380
+ def _format_as_markdown(self, data: Dict[str, Any]) -> str:
381
+ """Format as Markdown"""
382
+ lines = []
383
+ metadata = data.get("_metadata", {})
384
+
385
+ if metadata:
386
+ lines.append("# Extracted Information")
387
+ lines.append("")
388
+ lines.append(f"**Source:** {metadata.get('source_url', 'Unknown')}")
389
+ lines.append(f"**Goal:** {metadata.get('extraction_goal', 'Unknown')}")
390
+ lines.append(f"**Website Type:** {metadata.get('website_type', 'Unknown')}")
391
+ lines.append("")
392
+ lines.append("---")
393
+ lines.append("")
394
+
395
+ def format_item(key: str, value, level: int = 2):
396
+ if isinstance(value, dict):
397
+ if key != "_metadata":
398
+ lines.append(f"{'#' * level} {key.replace('_', ' ').title()}")
399
+ lines.append("")
400
+ for k, v in value.items():
401
+ format_item(k, v, level + 1)
402
+ elif isinstance(value, list):
403
+ lines.append(f"{'#' * level} {key.replace('_', ' ').title()}")
404
+ lines.append("")
405
+ for item in value:
406
+ lines.append(f"- {item}")
407
+ lines.append("")
408
+ else:
409
+ lines.append(f"**{key.replace('_', ' ').title()}:** {value}")
410
+ lines.append("")
411
+
412
+ for key, value in data.items():
413
+ format_item(key, value)
414
+
415
+ return "\n".join(lines)
416
+
417
+ def _format_as_html(self, data: Dict[str, Any]) -> str:
418
+ """Format as HTML"""
419
+ html_parts = ["<!DOCTYPE html><html><head><title>Extracted Information</title>"]
420
+ html_parts.append("<style>body{font-family:Arial,sans-serif;margin:40px;} h1,h2,h3{color:#333;} .metadata{background:#f5f5f5;padding:15px;border-radius:5px;margin-bottom:20px;}</style>")
421
+ html_parts.append("</head><body>")
422
+
423
+ metadata = data.get("_metadata", {})
424
+ if metadata:
425
+ html_parts.append("<h1>Extracted Information</h1>")
426
+ html_parts.append("<div class='metadata'>")
427
+ html_parts.append(f"<p><strong>Source:</strong> <a href='{metadata.get('source_url', '#')}'>{metadata.get('source_url', 'Unknown')}</a></p>")
428
+ html_parts.append(f"<p><strong>Goal:</strong> {metadata.get('extraction_goal', 'Unknown')}</p>")
429
+ html_parts.append(f"<p><strong>Website Type:</strong> {metadata.get('website_type', 'Unknown')}</p>")
430
+ html_parts.append("</div>")
431
+
432
+ def format_item(key: str, value, level: int = 2):
433
+ if isinstance(value, dict):
434
+ if key != "_metadata":
435
+ html_parts.append(f"<h{level}>{key.replace('_', ' ').title()}</h{level}>")
436
+ for k, v in value.items():
437
+ format_item(k, v, min(level + 1, 6))
438
+ elif isinstance(value, list):
439
+ html_parts.append(f"<h{level}>{key.replace('_', ' ').title()}</h{level}>")
440
+ html_parts.append("<ul>")
441
+ for item in value:
442
+ html_parts.append(f"<li>{item}</li>")
443
+ html_parts.append("</ul>")
444
+ else:
445
+ html_parts.append(f"<p><strong>{key.replace('_', ' ').title()}:</strong> {value}</p>")
446
+
447
+ for key, value in data.items():
448
+ format_item(key, value)
449
+
450
+ html_parts.append("</body></html>")
451
+ return "\n".join(html_parts)
452
+
453
+ def _format_as_csv(self, data: Dict[str, Any]) -> str:
454
+ """Format as CSV"""
455
+ try:
456
+ # Flatten the nested structure
457
+ flattened = self._flatten_dict(data)
458
+
459
+ # Create DataFrame
460
+ df = pd.DataFrame([flattened])
461
+
462
+ return df.to_csv(index=False)
463
+
464
+ except Exception as e:
465
+ print(f"❌ CSV formatting failed: {e}")
466
+ # Simple fallback
467
+ csv_lines = ["Field,Value"]
468
+ for key, value in data.items():
469
+ if key != "_metadata":
470
+ clean_value = str(value).replace('"', '""').replace('\n', ' ')
471
+ csv_lines.append(f'"{key}","{clean_value}"')
472
+ return "\n".join(csv_lines)
473
+
474
+ async def _format_as_pdf(self, data: Dict[str, Any], goal: str, job_id: str = None) -> str:
475
+ """Format as PDF and return file path"""
476
+ try:
477
+ from reportlab.lib.pagesizes import letter
478
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
479
+ from reportlab.lib.styles import getSampleStyleSheet
480
+ import html
481
+
482
+ output_dir = Path("outputs")
483
+ output_dir.mkdir(exist_ok=True)
484
+
485
+ # Use job_id if provided, otherwise use timestamp
486
+ if job_id:
487
+ filename = f"{job_id}.pdf"
488
+ else:
489
+ import time
490
+ timestamp = int(time.time())
491
+ filename = f"extracted_data_{timestamp}.pdf"
492
+
493
+ filepath = output_dir / filename
494
+
495
+ doc = SimpleDocTemplate(str(filepath), pagesize=letter, topMargin=72, bottomMargin=72)
496
+ styles = getSampleStyleSheet()
497
+ story = []
498
+
499
+ # Title
500
+ story.append(Paragraph("Extracted Information", styles['Title']))
501
+ story.append(Spacer(1, 20))
502
+
503
+ # Metadata
504
+ metadata = data.get("_metadata", {})
505
+ if metadata:
506
+ story.append(Paragraph(f"<b>Source:</b> {html.escape(str(metadata.get('source_url', 'Unknown')))}", styles['Normal']))
507
+ story.append(Paragraph(f"<b>Goal:</b> {html.escape(str(metadata.get('extraction_goal', 'Unknown')))}", styles['Normal']))
508
+ story.append(Paragraph(f"<b>Website Type:</b> {html.escape(str(metadata.get('website_type', 'Unknown')))}", styles['Normal']))
509
+ story.append(Spacer(1, 20))
510
+
511
+ # Content with better handling
512
+ def add_content(key: str, value, level: int = 0):
513
+ if isinstance(value, dict):
514
+ if key != "_metadata":
515
+ style = styles['Heading1'] if level == 0 else styles['Heading2']
516
+ clean_key = html.escape(key.replace('_', ' ').title())
517
+ story.append(Paragraph(clean_key, style))
518
+ story.append(Spacer(1, 10))
519
+ for k, v in value.items():
520
+ add_content(k, v, level + 1)
521
+ elif isinstance(value, list):
522
+ clean_key = html.escape(key.replace('_', ' ').title())
523
+ story.append(Paragraph(f"<b>{clean_key}:</b>", styles['Normal']))
524
+ story.append(Spacer(1, 6))
525
+ for item in value:
526
+ # Handle long text items and escape HTML
527
+ item_str = html.escape(str(item))
528
+ if len(item_str) > 300:
529
+ item_str = item_str[:300] + "..."
530
+ story.append(Paragraph(f"• {item_str}", styles['Normal']))
531
+ story.append(Spacer(1, 10))
532
+ else:
533
+ # Handle long text values and escape HTML
534
+ clean_key = html.escape(key.replace('_', ' ').title())
535
+ value_str = html.escape(str(value))
536
+ if len(value_str) > 800:
537
+ value_str = value_str[:800] + "..."
538
+ story.append(Paragraph(f"<b>{clean_key}:</b> {value_str}", styles['Normal']))
539
+ story.append(Spacer(1, 8))
540
+
541
+ for key, value in data.items():
542
+ add_content(key, value)
543
+
544
+ # Build PDF with error handling
545
+ try:
546
+ doc.build(story)
547
+ print(f"✅ PDF successfully generated: {filepath}")
548
+ return f"PDF_DIRECT_SAVE:{filepath}" # Special indicator for direct save
549
+ except Exception as build_error:
550
+ print(f"❌ PDF build error: {build_error}")
551
+ raise build_error
552
+
553
+ except ImportError:
554
+ print("❌ ReportLab not installed. Installing...")
555
+ import subprocess
556
+ import sys
557
+ try:
558
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "reportlab"])
559
+ # Try again after installation
560
+ return await self._format_as_pdf(data, goal, job_id)
561
+ except subprocess.CalledProcessError:
562
+ print("❌ Failed to install ReportLab")
563
+ raise ImportError("ReportLab installation failed")
564
+
565
+ except Exception as e:
566
+ print(f"❌ PDF generation failed: {e}")
567
+ # Return error indicator instead of fallback file
568
+ raise RuntimeError(f"PDF generation failed: {str(e)}")
569
+
570
+
571
+ def _flatten_dict(self, d: Dict[str, Any], parent_key: str = '', sep: str = '_') -> Dict[str, Any]:
572
+ """Flatten nested dictionary for CSV export"""
573
+ items = []
574
+ for k, v in d.items():
575
+ new_key = f"{parent_key}{sep}{k}" if parent_key else k
576
+ if isinstance(v, dict):
577
+ items.extend(self._flatten_dict(v, new_key, sep=sep).items())
578
+ elif isinstance(v, list):
579
+ items.append((new_key, '; '.join(map(str, v))))
580
+ else:
581
+ items.append((new_key, v))
582
+ return dict(items)
583
+
584
+ async def _fallback_extraction(self, browser: BrowserController, fmt: str, goal: str) -> str:
585
+ """Fallback extraction when AI fails"""
586
+ try:
587
+ content = await browser.page.inner_text("body")
588
+ url = browser.page.url
589
+ title = await browser.page.title()
590
+
591
+ fallback_data = {
592
+ "content": content[:3000], # Truncated
593
+ "source": url,
594
+ "title": title,
595
+ "extraction_method": "fallback",
596
+ "note": "AI extraction failed, using basic text extraction"
597
+ }
598
+
599
+ if fmt == "json":
600
+ return json.dumps(fallback_data, indent=2)
601
+ elif fmt == "txt":
602
+ return f"Title: {title}\nSource: {url}\n\nContent:\n{content}"
603
+ else:
604
+ return content
605
+
606
+ except Exception as e:
607
+ return f"Extraction completely failed: {str(e)}"
backend/vision_model.py ADDED
@@ -0,0 +1,368 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import google.generativeai as genai
4
+ from dotenv import load_dotenv
5
+ import json
6
+ import asyncio
7
+ import functools
8
+ from PIL import Image
9
+ import io
10
+
11
+ load_dotenv()
12
+
13
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
14
+ MODEL = genai.GenerativeModel("gemini-2.5-flash-preview-05-20")
15
+
16
+ # Universal system prompt - works for ANY website
17
+ SYSTEM_PROMPT = """
18
+ You are a universal web automation agent that can navigate and interact with ANY website to accomplish user goals.
19
+
20
+ You will receive:
21
+ 1. A screenshot of the current webpage
22
+ 2. Interactive elements with indices
23
+ 3. The user's specific goal/task
24
+ 4. Current URL and page context
25
+
26
+ Your job is to analyze the current page and determine the BEST next action to accomplish the user's goal, regardless of what type of website this is.
27
+
28
+ AVAILABLE ACTIONS:
29
+
30
+ CLICK - Click on any interactive element:
31
+ {"action": "click", "index": N, "reason": "specific reason for clicking this element"}
32
+
33
+ TYPE - Input text into any input field:
34
+ {"action": "type", "index": N, "text": "text to enter", "reason": "reason for entering this text"}
35
+
36
+ SCROLL - Navigate the page vertically:
37
+ {"action": "scroll", "direction": "down|up", "amount": 300-800, "reason": "reason for scrolling"}
38
+
39
+ PRESS_KEY - Press any keyboard key:
40
+ {"action": "press_key", "key": "Enter|Tab|Escape|Space|etc", "reason": "reason for key press"}
41
+
42
+ NAVIGATE - Go to a specific URL (only if needed):
43
+ {"action": "navigate", "url": "https://example.com", "reason": "reason for navigation"}
44
+
45
+ EXTRACT - Save current page content (when goal is achieved):
46
+ {"action": "extract", "reason": "goal accomplished, extracting relevant information"}
47
+
48
+ DONE - Mark task as complete:
49
+ {"action": "done", "reason": "task successfully completed"}
50
+
51
+ DECISION RULES:
52
+ 1. **Analyze the user's goal** - understand what information/action they want
53
+ 2. **Assess current page** - what type of page is this? What can be done here?
54
+ 3. **Choose best action** - what single action moves closest to the goal?
55
+ 4. **Be adaptive** - different sites have different patterns, adapt accordingly
56
+
57
+ WEBSITE TYPE DETECTION:
58
+ - **Search engines**: Look for search boxes, enter queries, click results
59
+ - **E-commerce**: Find products, navigate categories, view details
60
+ - **Social media**: Look for profiles, posts, navigation menus
61
+ - **Forms/Applications**: Fill required fields, submit forms
62
+ - **Content sites**: Navigate articles, extract information
63
+ - **Databases/APIs**: Use search/filter features, extract data
64
+
65
+ INTERACTION STRATEGY:
66
+ - **First time on page**: Look for main navigation, search, or primary actions
67
+ - **Search results**: Click on most relevant results for user's goal
68
+ - **Product pages**: Look for details, specifications, reviews as needed
69
+ - **Profile/About pages**: Extract relevant information about person/entity
70
+ - **Forms**: Fill systematically, validate inputs
71
+ - **Lists/Tables**: Use pagination, sorting, filtering as needed
72
+
73
+ EXTRACTION TIMING:
74
+ - Extract when you have found the specific information the user requested
75
+ - Don't extract from search results - click through to detailed pages first
76
+ - For research tasks: navigate to authoritative sources before extracting
77
+ - For data collection: ensure you're on pages with comprehensive information
78
+
79
+ REMEMBER: Be universal - work with ANY website structure, ANY content type, ANY user goal.
80
+ """
81
+
82
+ async def decide(img_bytes: bytes, page_state, goal: str) -> dict:
83
+ """Universal AI decision making for any website"""
84
+ print(f"🤖 Universal AI decision")
85
+ print(f"📊 Image size: {len(img_bytes)} bytes")
86
+ print(f"🎯 Goal: {goal}")
87
+ print(f"🖱️ Interactive elements: {len(page_state.selector_map)}")
88
+ print(f"📍 Current URL: {page_state.url}")
89
+
90
+ try:
91
+ # Compress image efficiently
92
+ image = Image.open(io.BytesIO(img_bytes))
93
+ max_size = (1280, 800)
94
+ image.thumbnail(max_size, Image.Resampling.LANCZOS)
95
+
96
+ compressed_buffer = io.BytesIO()
97
+ image.save(compressed_buffer, format='JPEG', quality=75, optimize=True)
98
+ compressed_image = Image.open(compressed_buffer)
99
+
100
+ # Create comprehensive element information (dynamic based on content)
101
+ interactive_elements = []
102
+ max_elements = min(20, len(page_state.selector_map)) # Adaptive limit
103
+
104
+ for index in sorted(page_state.selector_map.keys())[:max_elements]:
105
+ elem = page_state.selector_map[index]
106
+
107
+ # Dynamic element description based on context
108
+ element_data = {
109
+ "index": index,
110
+ "tag": elem.tag_name,
111
+ "text": elem.text[:60] if elem.text else "",
112
+ "clickable": elem.is_clickable,
113
+ "input": elem.is_input,
114
+ }
115
+
116
+ # Add contextual attributes dynamically
117
+ if elem.attributes.get("href"):
118
+ element_data["link"] = elem.attributes["href"][:100]
119
+ if elem.attributes.get("placeholder"):
120
+ element_data["placeholder"] = elem.attributes["placeholder"][:30]
121
+ if elem.attributes.get("type"):
122
+ element_data["type"] = elem.attributes["type"]
123
+ if elem.attributes.get("class"):
124
+ # Extract meaningful class hints
125
+ classes = elem.attributes["class"].lower()
126
+ if any(hint in classes for hint in ["search", "login", "submit", "button", "nav", "menu"]):
127
+ element_data["class_hint"] = classes[:50]
128
+ if elem.attributes.get("id"):
129
+ element_data["id"] = elem.attributes["id"][:30]
130
+
131
+ interactive_elements.append(element_data)
132
+
133
+ # Detect website type dynamically
134
+ website_type = detect_website_type(page_state.url, page_state.title, interactive_elements)
135
+
136
+ # Create dynamic context-aware prompt
137
+ prompt = f"""
138
+ USER GOAL: {goal}
139
+
140
+ CURRENT CONTEXT:
141
+ - URL: {page_state.url}
142
+ - Page Title: {page_state.title}
143
+ - Website Type: {website_type}
144
+ - Available Elements: {len(interactive_elements)}
145
+
146
+ INTERACTIVE ELEMENTS:
147
+ {json.dumps(interactive_elements, indent=1)}
148
+
149
+ Based on the user's goal and current page context, what is the BEST next action?
150
+ Consider the website type and adapt your strategy accordingly.
151
+ """
152
+
153
+ content = [SYSTEM_PROMPT, prompt, compressed_image]
154
+
155
+ # Count tokens and send request
156
+ token_count_response = await asyncio.to_thread(
157
+ functools.partial(MODEL.count_tokens, content)
158
+ )
159
+ input_tokens = token_count_response.total_tokens
160
+
161
+ response = await asyncio.to_thread(
162
+ functools.partial(MODEL.generate_content, content)
163
+ )
164
+
165
+ raw_text = response.text
166
+ response_tokens = await count_response_tokens(raw_text)
167
+ total_tokens = input_tokens + response_tokens
168
+
169
+ # Parse response with validation
170
+ result = parse_ai_response(raw_text, page_state, goal, website_type)
171
+
172
+ # Add token usage
173
+ result['token_usage'] = {
174
+ 'prompt_tokens': input_tokens,
175
+ 'response_tokens': response_tokens,
176
+ 'total_tokens': total_tokens
177
+ }
178
+
179
+ print(f"🎯 Universal Result: {result}")
180
+ return result
181
+
182
+ except Exception as e:
183
+ print(f"❌ Error: {e}")
184
+ return {
185
+ "action": "done",
186
+ "error": str(e),
187
+ "token_usage": {"prompt_tokens": 0, "response_tokens": 0, "total_tokens": 0}
188
+ }
189
+
190
+ def detect_website_type(url: str, title: str, elements: list) -> str:
191
+ """Dynamically detect website type based on URL and content"""
192
+ url_lower = url.lower()
193
+ title_lower = title.lower()
194
+
195
+ # Search engines
196
+ if any(domain in url_lower for domain in ["google.com", "bing.com", "duckduckgo.com", "yahoo.com"]):
197
+ if "/search" in url_lower or any("search" in elem.get("text", "").lower() for elem in elements):
198
+ return "search_results"
199
+ return "search_engine"
200
+
201
+ # E-commerce
202
+ if any(domain in url_lower for domain in ["amazon", "ebay", "shopify", "etsy", "alibaba"]):
203
+ return "ecommerce"
204
+ if any(word in title_lower for word in ["shop", "store", "buy", "cart", "product"]):
205
+ return "ecommerce"
206
+
207
+ # Social media
208
+ if any(domain in url_lower for domain in ["linkedin", "twitter", "facebook", "instagram", "github"]):
209
+ return "social_profile"
210
+
211
+ # Forms/Applications
212
+ if any(elem.get("input") for elem in elements if len([e for e in elements if e.get("input")]) > 3):
213
+ return "form_application"
214
+
215
+ # Content/News sites
216
+ if any(word in title_lower for word in ["news", "article", "blog", "post"]):
217
+ return "content_site"
218
+
219
+ # Company/Organization
220
+ if any(word in title_lower for word in ["company", "corp", "inc", "ltd", "about", "contact"]):
221
+ return "company_site"
222
+
223
+ # Database/Directory
224
+ if any(word in url_lower for word in ["directory", "database", "catalog", "listing"]):
225
+ return "database_site"
226
+
227
+ return "general_website"
228
+
229
+ def parse_ai_response(raw_text: str, page_state, goal: str, website_type: str) -> dict:
230
+ """Parse AI response with intelligent fallbacks"""
231
+ try:
232
+ # Extract JSON from response
233
+ start = raw_text.find('{')
234
+ end = raw_text.rfind('}') + 1
235
+
236
+ if start != -1 and end > start:
237
+ json_str = raw_text[start:end]
238
+ result = json.loads(json_str)
239
+
240
+ # Validate action
241
+ valid_actions = ["click", "type", "scroll", "press_key", "navigate", "extract", "done"]
242
+ if result.get("action") not in valid_actions:
243
+ return get_fallback_action(page_state, goal, website_type)
244
+
245
+ # Validate index if present
246
+ if "index" in result and result["index"] not in page_state.selector_map:
247
+ print(f"❌ Invalid index {result['index']}")
248
+ return get_fallback_action(page_state, goal, website_type)
249
+
250
+ return result
251
+ else:
252
+ return get_fallback_action(page_state, goal, website_type)
253
+
254
+ except json.JSONDecodeError as e:
255
+ print(f"❌ JSON error: {e}")
256
+ return get_fallback_action(page_state, goal, website_type)
257
+
258
+ def get_fallback_action(page_state, goal: str, website_type: str) -> dict:
259
+ """Intelligent fallback based on context"""
260
+ goal_lower = goal.lower()
261
+
262
+ # Look for obvious search boxes
263
+ for index, elem in page_state.selector_map.items():
264
+ if elem.is_input and any(word in elem.text.lower() + str(elem.attributes).lower()
265
+ for word in ["search", "query", "find"]):
266
+ if "search" in goal_lower:
267
+ return {"action": "type", "index": index, "text": extract_search_query(goal),
268
+ "reason": "Found search box for user query"}
269
+
270
+ # Look for relevant links based on goal
271
+ for index, elem in page_state.selector_map.items():
272
+ if elem.is_clickable and elem.text:
273
+ if any(word in elem.text.lower() for word in goal_lower.split()[:3]):
274
+ return {"action": "click", "index": index,
275
+ "reason": f"Found relevant link: {elem.text[:30]}"}
276
+
277
+ # Default behaviors by website type
278
+ if website_type == "search_results":
279
+ # Click first meaningful result
280
+ for index, elem in page_state.selector_map.items():
281
+ if elem.is_clickable and len(elem.text) > 10:
282
+ return {"action": "click", "index": index,
283
+ "reason": "Clicking search result for more details"}
284
+
285
+ # Generic fallback
286
+ return {"action": "scroll", "direction": "down", "amount": 400,
287
+ "reason": "Exploring page to find relevant content"}
288
+
289
+ def extract_search_query(goal: str) -> str:
290
+ """Extract search query from user goal"""
291
+ # Remove common command words
292
+ stop_words = ["go", "to", "search", "for", "find", "get", "save", "extract", "info", "about"]
293
+ words = goal.split()
294
+ query_words = [word for word in words if word.lower() not in stop_words]
295
+ return " ".join(query_words[:6]) # Limit query length
296
+
297
+ async def count_response_tokens(response_text: str) -> int:
298
+ """Count tokens in the response text"""
299
+ try:
300
+ token_count_response = await asyncio.to_thread(
301
+ functools.partial(MODEL.count_tokens, response_text)
302
+ )
303
+ return token_count_response.total_tokens
304
+ except Exception as e:
305
+ print(f"❌ Error counting response tokens: {e}")
306
+ return len(response_text) // 4
307
+
308
+
309
+ ## This doesn't work with current response structure or generative model
310
+ # extract token usage
311
+ def extract_token_usage(response):
312
+ """
313
+ Extract token usage from various possible locations in the response
314
+ """
315
+ try:
316
+ # Method 1: Check usage_metadata attribute
317
+ if hasattr(response, 'usage_metadata') and response.usage_metadata:
318
+ print(f"📊 Found usage_metadata:")
319
+ print(f" - Response object: {response.usage_metadata}")
320
+ return {
321
+ 'prompt_tokens': getattr(response.usage_metadata, 'prompt_token_count', 0),
322
+ 'response_tokens': getattr(response.usage_metadata, 'candidates_token_count', 0),
323
+ 'total_tokens': getattr(response.usage_metadata, 'total_token_count', 0)
324
+ }
325
+
326
+ # Method 2: Check if it's in the result
327
+ if hasattr(response, 'result') and response.result:
328
+ result_dict = response.result.to_dict() if hasattr(response.result, 'to_dict') else {}
329
+ print(f"📊 Checking result dict: {result_dict.keys() if isinstance(result_dict, dict) else 'Not a dict'}")
330
+
331
+ if 'usage_metadata' in result_dict:
332
+ usage = result_dict['usage_metadata']
333
+ return {
334
+ 'prompt_tokens': usage.get('prompt_token_count', 0),
335
+ 'response_tokens': usage.get('candidates_token_count', 0),
336
+ 'total_tokens': usage.get('total_token_count', 0)
337
+ }
338
+
339
+ # Method 3: Check candidates for token_count
340
+ if hasattr(response, 'candidates') and response.candidates:
341
+ candidate = response.candidates[0]
342
+ if hasattr(candidate, 'token_count'):
343
+ print(f"📊 Found token_count in candidate: {candidate.token_count}")
344
+ # This might not give us the breakdown, but it's something
345
+ return {
346
+ 'prompt_tokens': 0, # Not available separately
347
+ 'response_tokens': candidate.token_count,
348
+ 'total_tokens': candidate.token_count
349
+ }
350
+
351
+ # Method 4: Try to access through the internal result
352
+ if hasattr(response, 'result') and hasattr(response.result, 'candidates'):
353
+ candidates = response.result.candidates
354
+ if candidates and len(candidates) > 0:
355
+ candidate = candidates[0]
356
+ if hasattr(candidate, 'token_count'):
357
+ return {
358
+ 'prompt_tokens': 0,
359
+ 'response_tokens': candidate.token_count,
360
+ 'total_tokens': candidate.token_count
361
+ }
362
+
363
+ print("❌ No token usage found in any expected location")
364
+ return None
365
+
366
+ except Exception as e:
367
+ print(f"❌ Error extracting token usage: {e}")
368
+ return None
backend/vnc_proxy.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/vnc_proxy.py
2
+ import asyncio
3
+ import websockets
4
+ import socket
5
+ import logging
6
+ from typing import Optional
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ class VNCWebSocketProxy:
11
+ def __init__(self, vnc_host: str = "localhost", vnc_port: int = 5901):
12
+ self.vnc_host = vnc_host
13
+ self.vnc_port = vnc_port
14
+ self.server = None
15
+
16
+ async def start_proxy(self, websocket_port: int):
17
+ """Start the WebSocket to VNC proxy server"""
18
+ try:
19
+ self.server = await websockets.serve(
20
+ self.handle_websocket,
21
+ "localhost",
22
+ websocket_port
23
+ )
24
+ logger.info(f"VNC WebSocket proxy started on port {websocket_port}")
25
+ return True
26
+ except Exception as e:
27
+ logger.error(f"Failed to start VNC proxy: {e}")
28
+ return False
29
+
30
+ async def stop_proxy(self):
31
+ """Stop the proxy server"""
32
+ if self.server:
33
+ self.server.close()
34
+ await self.server.wait_closed()
35
+
36
+ async def handle_websocket(self, websocket, path):
37
+ """Handle WebSocket connections and proxy to VNC"""
38
+ vnc_socket = None
39
+ try:
40
+ # Connect to VNC server
41
+ vnc_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
42
+ vnc_socket.connect((self.vnc_host, self.vnc_port))
43
+ vnc_socket.setblocking(False)
44
+
45
+ logger.info(f"Connected to VNC server at {self.vnc_host}:{self.vnc_port}")
46
+
47
+ # Create tasks for bidirectional communication
48
+ ws_to_vnc_task = asyncio.create_task(
49
+ self.websocket_to_vnc(websocket, vnc_socket)
50
+ )
51
+ vnc_to_ws_task = asyncio.create_task(
52
+ self.vnc_to_websocket(vnc_socket, websocket)
53
+ )
54
+
55
+ # Wait for either task to complete (indicating disconnection)
56
+ await asyncio.gather(ws_to_vnc_task, vnc_to_ws_task, return_exceptions=True)
57
+
58
+ except Exception as e:
59
+ logger.error(f"Error in VNC proxy: {e}")
60
+ finally:
61
+ if vnc_socket:
62
+ vnc_socket.close()
63
+
64
+ async def websocket_to_vnc(self, websocket, vnc_socket):
65
+ """Forward WebSocket messages to VNC"""
66
+ try:
67
+ async for message in websocket:
68
+ if isinstance(message, bytes):
69
+ await asyncio.get_event_loop().sock_sendall(vnc_socket, message)
70
+ except websockets.exceptions.ConnectionClosed:
71
+ pass
72
+ except Exception as e:
73
+ logger.error(f"Error forwarding WebSocket to VNC: {e}")
74
+
75
+ async def vnc_to_websocket(self, vnc_socket, websocket):
76
+ """Forward VNC messages to WebSocket"""
77
+ try:
78
+ while True:
79
+ data = await asyncio.get_event_loop().sock_recv(vnc_socket, 4096)
80
+ if not data:
81
+ break
82
+ await websocket.send(data)
83
+ except websockets.exceptions.ConnectionClosed:
84
+ pass
85
+ except Exception as e:
86
+ logger.error(f"Error forwarding VNC to WebSocket: {e}")
87
+
88
+ # Global proxy manager
89
+ vnc_proxies = {}
90
+
91
+ async def start_vnc_proxy(vnc_port: int) -> Optional[int]:
92
+ """Start a VNC WebSocket proxy for the given VNC port"""
93
+ websocket_port = vnc_port + 1000 # Offset for WebSocket port
94
+
95
+ if websocket_port in vnc_proxies:
96
+ return websocket_port
97
+
98
+ proxy = VNCWebSocketProxy("localhost", vnc_port)
99
+ success = await proxy.start_proxy(websocket_port)
100
+
101
+ if success:
102
+ vnc_proxies[websocket_port] = proxy
103
+ return websocket_port
104
+ return None
105
+
106
+ async def stop_vnc_proxy(websocket_port: int):
107
+ """Stop a VNC WebSocket proxy"""
108
+ if websocket_port in vnc_proxies:
109
+ await vnc_proxies[websocket_port].stop_proxy()
110
+ del vnc_proxies[websocket_port]