Spaces:

AUXteam
/

Scraper_hub

Running

App Files Files Community

Scraper_hub / src /utils /browser_tools.py

AUXteam

Upload folder using huggingface_hub

1c35a0c verified 27 days ago

raw

history blame contribute delete

10.4 kB

	import json
	import logging
	from typing import Optional, Dict, Any, List
	from langchain_core.tools import tool
	from patchright.async_api import async_playwright, Browser, BrowserContext, Page
	import asyncio

	logger = logging.getLogger(__name__)

	# Global state for persistent browser session
	# Note: In a production API with multiple workers, this should be managed per-session/request.
	# For now, we use a simple mechanism to store session-specific browsers if session_id is provided via context,
	# but since tools interface doesn't easily pass that, we default to a singleton for single-user/cli usage.
	class BrowserManager:
	_instance = None
	_playwright = None
	_browser = None
	_context = None
	_page = None
	_lock = asyncio.Lock()

	@classmethod
	async def get_page(cls, headless: bool = True) -> Page:
	async with cls._lock:
	if cls._playwright is None:
	cls._playwright = await async_playwright().start()

	if cls._browser is None:
	# Use Scrapling-compatible browser launch if needed, or standard playwright
	# Using standard playwright for tools to ensure full interactivity
	cls._browser = await cls._playwright.chromium.launch(headless=headless)

	if cls._context is None:
	cls._context = await cls._browser.new_context(
	viewport={'width': 1280, 'height': 800},
	user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
	)

	if cls._page is None:
	cls._page = await cls._context.new_page()

	return cls._page

	@classmethod
	async def close(cls):
	async with cls._lock:
	if cls._context:
	await cls._context.close()
	cls._context = None
	cls._page = None
	if cls._browser:
	await cls._browser.close()
	cls._browser = None
	if cls._playwright:
	await cls._playwright.stop()
	cls._playwright = None

	# Helper to run async code synchronously for tool execution
	def run_sync(coro):
	try:
	loop = asyncio.get_event_loop()
	except RuntimeError:
	loop = asyncio.new_event_loop()
	asyncio.set_event_loop(loop)

	if loop.is_running():
	# If we are already in an async loop (e.g. FastAPI), we can't block.
	# This is a limitation of the current tool design which returns strings immediately.
	# We might need to use nest_asyncio or assume tools are awaited by the caller.
	# However, LangChain tools can be async.
	# For this implementation, we will use a separate thread or new loop if possible,
	# but nest_asyncio is safer if available.
	import nest_asyncio
	nest_asyncio.apply()
	return loop.run_until_complete(coro)
	else:
	return loop.run_until_complete(coro)

	@tool
	def browse_and_extract(url: str, selector: str = "body", use_persistent: bool = True) -> str:
	"""Browse to a URL and extract text content from the specified CSS selector."""
	async def _action():
	try:
	page = await BrowserManager.get_page()
	await page.goto(url, wait_until="domcontentloaded")
	# Wait for selector if specific one provided
	if selector != "body":
	try:
	await page.wait_for_selector(selector, timeout=5000)
	except:
	pass # Continue if selector not found immediately

	element = await page.query_selector(selector)
	if element:
	text = await element.inner_text()
	return text
	return f"Element '{selector}' not found."
	except Exception as e:
	return f"Error: {str(e)}"

	return run_sync(_action())

	@tool
	def click_element(url: str, selector: str, use_persistent: bool = True) -> str:
	"""Click an element on the page identified by the CSS selector. URL is ignored if persistent session is active."""
	async def _action():
	try:
	page = await BrowserManager.get_page()
	# If URL is different from current, navigate?
	# Usually agent provides URL context, but in persistent mode we might want to stay on current page
	# ignoring URL arg if we are already there-ish or if use_persistent is True
	if page.url == "about:blank" and url:
	await page.goto(url)

	await page.click(selector)
	return "Clicked element."
	except Exception as e:
	return f"Error: {str(e)}"
	return run_sync(_action())

	@tool
	def fill_field(url: str, selector: str, text: str, use_persistent: bool = True) -> str:
	"""Fill a text field or form element identified by the CSS selector with the provided text."""
	async def _action():
	try:
	page = await BrowserManager.get_page()
	await page.fill(selector, text)
	return f"Filled '{selector}' with text."
	except Exception as e:
	return f"Error: {str(e)}"
	return run_sync(_action())

	@tool
	def execute_javascript(url: str, script: str, use_persistent: bool = True) -> str:
	"""Execute custom JavaScript on the page and return the result."""
	async def _action():
	try:
	page = await BrowserManager.get_page()
	result = await page.evaluate(script)
	return str(result)
	except Exception as e:
	return f"Error: {str(e)}"
	return run_sync(_action())

	@tool
	def get_cookies(url: str, use_persistent: bool = True) -> str:
	"""Get all cookies for the current domain in JSON format."""
	async def _action():
	try:
	page = await BrowserManager.get_page()
	cookies = await page.context.cookies()
	return json.dumps(cookies)
	except Exception as e:
	return f"Error: {str(e)}"
	return run_sync(_action())

	@tool
	def set_cookies(url: str, cookies_json: str, use_persistent: bool = True) -> str:
	"""Set cookies on the page from a JSON string."""
	async def _action():
	try:
	page = await BrowserManager.get_page()
	cookies = json.loads(cookies_json)
	await page.context.add_cookies(cookies)
	return "Cookies set."
	except Exception as e:
	return f"Error: {str(e)}"
	return run_sync(_action())

	@tool
	def scroll_page(url: str, direction: str = "bottom", pixels: float = 500, use_persistent: bool = True) -> str:
	"""Scroll the page in a specified direction ('bottom', 'top', 'down', 'up')."""
	async def _action():
	try:
	page = await BrowserManager.get_page()
	if direction == "bottom":
	await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
	elif direction == "top":
	await page.evaluate("window.scrollTo(0, 0)")
	elif direction == "down":
	await page.evaluate(f"window.scrollBy(0, {pixels})")
	elif direction == "up":
	await page.evaluate(f"window.scrollBy(0, -{pixels})")
	return f"Scrolled {direction}."
	except Exception as e:
	return f"Error: {str(e)}"
	return run_sync(_action())

	@tool
	def take_screenshot(url: str, full_page: bool = False, use_persistent: bool = True) -> str:
	"""Take a screenshot of the current page and return base64 string."""
	async def _action():
	try:
	page = await BrowserManager.get_page()
	import base64
	screenshot = await page.screenshot(full_page=full_page)
	return f"Screenshot captured (base64): {base64.b64encode(screenshot).decode()[:100]}..."
	except Exception as e:
	return f"Error: {str(e)}"
	return run_sync(_action())

	@tool
	def get_html_source(url: str, use_persistent: bool = True) -> str:
	"""Get the full HTML source code of the current page."""
	async def _action():
	try:
	page = await BrowserManager.get_page()
	return await page.content()
	except Exception as e:
	return f"Error: {str(e)}"
	return run_sync(_action())

	@tool
	def get_page_info(url: str, use_persistent: bool = True) -> str:
	"""Get comprehensive page information including title, URL."""
	async def _action():
	try:
	page = await BrowserManager.get_page()
	return json.dumps({
	"title": await page.title(),
	"url": page.url,
	"content_preview": (await page.content())[:500]
	})
	except Exception as e:
	return f"Error: {str(e)}"
	return run_sync(_action())

	@tool
	def wait_for_element(url: str, selector: str, timeout: float = 10, use_persistent: bool = True) -> str:
	"""Wait for an element matching the CSS selector to appear on the page."""
	async def _action():
	try:
	page = await BrowserManager.get_page()
	await page.wait_for_selector(selector, timeout=timeout * 1000)
	return f"Element '{selector}' appeared."
	except Exception as e:
	return f"Error: {str(e)}"
	return run_sync(_action())

	@tool
	def task_complete(reason: str) -> str:
	"""Call this tool when you have successfully completed the task and have the final data or answer."""
	return f"TASK COMPLETE: {reason}"

	@tool
	def agent_reflection(thought: str, adaptation_plan: str) -> str:
	"""Call this tool to reflect on your progress, especially after an error or unexpected result.
	Explain what you've learned and how you're adapting your strategy."""
	return f"REFLECTION: {thought}\nADAPTATION PLAN: {adaptation_plan}"

	def get_all_browser_tools():
	"""Returns a list of all browser automation tools."""
	return [
	browse_and_extract,
	click_element,
	fill_field,
	execute_javascript,
	get_cookies,
	set_cookies,
	scroll_page,
	take_screenshot,
	get_html_source,
	get_page_info,
	wait_for_element,
	task_complete,
	agent_reflection
	]