GitHub Copilot commited on
Commit
ad71e2d
·
1 Parent(s): 41a5ff3

Feature: Add BrowserAutomationConnector with smolagents integration

Browse files
Files changed (2) hide show
  1. logos/connectors.py +133 -2
  2. requirements.txt +3 -0
logos/connectors.py CHANGED
@@ -299,6 +299,130 @@ class NeMoAgentConnector:
299
  return f"[Vision Error] {e}"
300
 
301
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
  # ==========================================
303
  # FACTORY
304
  # ==========================================
@@ -308,7 +432,7 @@ def get_connector(connector_type: str, **kwargs) -> Any:
308
  Factory function for connectors.
309
 
310
  Args:
311
- connector_type: One of 'hf', 'ocr', 'vision', 'nemo'
312
  **kwargs: Connector-specific arguments
313
 
314
  Returns:
@@ -318,7 +442,8 @@ def get_connector(connector_type: str, **kwargs) -> Any:
318
  'hf': HuggingFaceConnector,
319
  'ocr': OCRConnector,
320
  'vision': VisionConnector,
321
- 'nemo': NeMoAgentConnector
 
322
  }
323
 
324
  if connector_type not in connectors:
@@ -355,5 +480,11 @@ AVAILABLE_CONNECTORS = {
355
  'capabilities': ['chat', 'analyze_diagram', 'react_agent', 'tool_calling'],
356
  'requires': ['nvidia-nat', 'openai'],
357
  'env_vars': ['NVIDIA_API_KEY']
 
 
 
 
 
 
358
  }
359
  }
 
299
  return f"[Vision Error] {e}"
300
 
301
 
302
+ # ==========================================
303
+ # BROWSER AUTOMATION CONNECTOR (smolagents)
304
+ # ==========================================
305
+
306
+ class BrowserAutomationConnector:
307
+ """
308
+ Adapter for HuggingFace smolagents + helium for browser automation.
309
+ Enables autonomous web navigation, form filling, and data extraction.
310
+ """
311
+
312
+ def __init__(self, headless: bool = True):
313
+ self.headless = headless
314
+ self._driver = None
315
+ self._agent = None
316
+
317
+ def _ensure_browser(self):
318
+ """Initialize browser with helium."""
319
+ if self._driver is None:
320
+ try:
321
+ from helium import start_chrome, go_to, click, write, get_driver
322
+ from selenium.webdriver.chrome.options import Options
323
+
324
+ options = Options()
325
+ if self.headless:
326
+ options.add_argument('--headless')
327
+ options.add_argument('--no-sandbox')
328
+ options.add_argument('--disable-dev-shm-usage')
329
+
330
+ start_chrome(headless=self.headless)
331
+ self._driver = get_driver()
332
+ except ImportError:
333
+ raise ImportError("helium/selenium not installed. Run: pip install helium selenium")
334
+ return self._driver
335
+
336
+ def navigate(self, url: str) -> str:
337
+ """Navigate to a URL and return page title."""
338
+ try:
339
+ from helium import go_to, get_driver
340
+ self._ensure_browser()
341
+ go_to(url)
342
+ return f"Navigated to: {get_driver().title}"
343
+ except Exception as e:
344
+ return f"[Navigation Error] {e}"
345
+
346
+ def search_page(self, query: str) -> str:
347
+ """Search for text on current page."""
348
+ try:
349
+ driver = self._ensure_browser()
350
+ page_source = driver.page_source.lower()
351
+ if query.lower() in page_source:
352
+ return f"Found '{query}' on page"
353
+ else:
354
+ return f"'{query}' not found on page"
355
+ except Exception as e:
356
+ return f"[Search Error] {e}"
357
+
358
+ def get_page_text(self) -> str:
359
+ """Extract visible text from current page."""
360
+ try:
361
+ driver = self._ensure_browser()
362
+ from bs4 import BeautifulSoup
363
+ soup = BeautifulSoup(driver.page_source, 'html.parser')
364
+ text = soup.get_text(separator=' ', strip=True)
365
+ return text[:5000] # Limit to 5k chars
366
+ except Exception as e:
367
+ return f"[Extraction Error] {e}"
368
+
369
+ def click_element(self, text: str) -> str:
370
+ """Click an element by its text."""
371
+ try:
372
+ from helium import click
373
+ self._ensure_browser()
374
+ click(text)
375
+ return f"Clicked: {text}"
376
+ except Exception as e:
377
+ return f"[Click Error] {e}"
378
+
379
+ def type_text(self, field: str, text: str) -> str:
380
+ """Type text into a field."""
381
+ try:
382
+ from helium import write
383
+ self._ensure_browser()
384
+ write(text, into=field)
385
+ return f"Typed into {field}"
386
+ except Exception as e:
387
+ return f"[Type Error] {e}"
388
+
389
+ def close(self):
390
+ """Close the browser."""
391
+ try:
392
+ from helium import kill_browser
393
+ kill_browser()
394
+ self._driver = None
395
+ except:
396
+ pass
397
+
398
+ def run_agent_task(self, task: str, model: str = "Qwen/Qwen2.5-Coder-32B-Instruct") -> str:
399
+ """
400
+ Run a browser automation task using smolagents Code Agent.
401
+
402
+ Args:
403
+ task: Natural language task description
404
+ model: HF model for the code agent
405
+
406
+ Returns:
407
+ Task result
408
+ """
409
+ try:
410
+ from smolagents import CodeAgent, HfApiModel
411
+ from smolagents.tools import Tool
412
+
413
+ # Create agent with browser tools
414
+ model_instance = HfApiModel(model)
415
+ agent = CodeAgent(tools=[], model=model_instance)
416
+
417
+ # Run the task
418
+ result = agent.run(task)
419
+ return str(result)
420
+ except ImportError:
421
+ return "[Error] smolagents not installed. Run: pip install smolagents"
422
+ except Exception as e:
423
+ return f"[Agent Error] {e}"
424
+
425
+
426
  # ==========================================
427
  # FACTORY
428
  # ==========================================
 
432
  Factory function for connectors.
433
 
434
  Args:
435
+ connector_type: One of 'hf', 'ocr', 'vision', 'nemo', 'browser'
436
  **kwargs: Connector-specific arguments
437
 
438
  Returns:
 
442
  'hf': HuggingFaceConnector,
443
  'ocr': OCRConnector,
444
  'vision': VisionConnector,
445
+ 'nemo': NeMoAgentConnector,
446
+ 'browser': BrowserAutomationConnector
447
  }
448
 
449
  if connector_type not in connectors:
 
480
  'capabilities': ['chat', 'analyze_diagram', 'react_agent', 'tool_calling'],
481
  'requires': ['nvidia-nat', 'openai'],
482
  'env_vars': ['NVIDIA_API_KEY']
483
+ },
484
+ 'browser': {
485
+ 'name': 'Browser Automation (smolagents)',
486
+ 'capabilities': ['navigate', 'click_element', 'type_text', 'get_page_text', 'run_agent_task'],
487
+ 'requires': ['smolagents', 'helium', 'selenium', 'beautifulsoup4'],
488
+ 'env_vars': []
489
  }
490
  }
requirements.txt CHANGED
@@ -8,3 +8,6 @@ plotly
8
  sympy
9
  easyocr
10
  nvidia-nat
 
 
 
 
8
  sympy
9
  easyocr
10
  nvidia-nat
11
+ smolagents
12
+ helium
13
+ beautifulsoup4