File size: 10,387 Bytes
6077105
 
1c35a0c
6077105
1c35a0c
 
6077105
 
 
1c35a0c
 
 
 
 
 
 
 
 
 
 
6077105
1c35a0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6077105
1c35a0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6077105
 
1c35a0c
6077105
1c35a0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6077105
 
1c35a0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6077105
 
1c35a0c
6077105
1c35a0c
 
 
 
 
 
 
 
6077105
 
1c35a0c
6077105
1c35a0c
 
 
 
 
 
 
 
6077105
 
1c35a0c
6077105
1c35a0c
 
 
 
 
 
 
 
6077105
 
1c35a0c
6077105
1c35a0c
 
 
 
 
 
 
 
 
6077105
 
1c35a0c
6077105
1c35a0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6077105
 
1c35a0c
 
 
 
 
 
 
 
 
 
 
6077105
7dd8e08
1c35a0c
7dd8e08
1c35a0c
 
 
 
 
 
 
7dd8e08
 
1c35a0c
 
 
 
 
 
 
 
 
 
 
 
 
7dd8e08
 
1c35a0c
7dd8e08
1c35a0c
 
 
 
 
 
 
 
7dd8e08
b2acf55
 
 
 
 
8985f34
 
 
 
 
 
6077105
 
 
 
 
 
 
 
 
 
7dd8e08
 
 
b2acf55
8985f34
 
6077105
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
import json
import logging
from typing import Optional, Dict, Any, List
from langchain_core.tools import tool
from patchright.async_api import async_playwright, Browser, BrowserContext, Page
import asyncio

logger = logging.getLogger(__name__)

# Global state for persistent browser session
# Note: In a production API with multiple workers, this should be managed per-session/request.
# For now, we use a simple mechanism to store session-specific browsers if session_id is provided via context,
# but since tools interface doesn't easily pass that, we default to a singleton for single-user/cli usage.
class BrowserManager:
    _instance = None
    _playwright = None
    _browser = None
    _context = None
    _page = None
    _lock = asyncio.Lock()

    @classmethod
    async def get_page(cls, headless: bool = True) -> Page:
        async with cls._lock:
            if cls._playwright is None:
                cls._playwright = await async_playwright().start()
            
            if cls._browser is None:
                # Use Scrapling-compatible browser launch if needed, or standard playwright
                # Using standard playwright for tools to ensure full interactivity
                cls._browser = await cls._playwright.chromium.launch(headless=headless)
                
            if cls._context is None:
                cls._context = await cls._browser.new_context(
                    viewport={'width': 1280, 'height': 800},
                    user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
                )
                
            if cls._page is None:
                cls._page = await cls._context.new_page()
                
            return cls._page

    @classmethod
    async def close(cls):
        async with cls._lock:
            if cls._context:
                await cls._context.close()
                cls._context = None
                cls._page = None
            if cls._browser:
                await cls._browser.close()
                cls._browser = None
            if cls._playwright:
                await cls._playwright.stop()
                cls._playwright = None

# Helper to run async code synchronously for tool execution
def run_sync(coro):
    try:
        loop = asyncio.get_event_loop()
    except RuntimeError:
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
    
    if loop.is_running():
        # If we are already in an async loop (e.g. FastAPI), we can't block.
        # This is a limitation of the current tool design which returns strings immediately.
        # We might need to use nest_asyncio or assume tools are awaited by the caller.
        # However, LangChain tools can be async.
        # For this implementation, we will use a separate thread or new loop if possible,
        # but nest_asyncio is safer if available.
        import nest_asyncio
        nest_asyncio.apply()
        return loop.run_until_complete(coro)
    else:
        return loop.run_until_complete(coro)

@tool
def browse_and_extract(url: str, selector: str = "body", use_persistent: bool = True) -> str:
    """Browse to a URL and extract text content from the specified CSS selector."""
    async def _action():
        try:
            page = await BrowserManager.get_page()
            await page.goto(url, wait_until="domcontentloaded")
            # Wait for selector if specific one provided
            if selector != "body":
                try:
                    await page.wait_for_selector(selector, timeout=5000)
                except:
                    pass # Continue if selector not found immediately
            
            element = await page.query_selector(selector)
            if element:
                text = await element.inner_text()
                return text
            return f"Element '{selector}' not found."
        except Exception as e:
            return f"Error: {str(e)}"

    return run_sync(_action())

@tool
def click_element(url: str, selector: str, use_persistent: bool = True) -> str:
    """Click an element on the page identified by the CSS selector. URL is ignored if persistent session is active."""
    async def _action():
        try:
            page = await BrowserManager.get_page()
            # If URL is different from current, navigate? 
            # Usually agent provides URL context, but in persistent mode we might want to stay on current page
            # ignoring URL arg if we are already there-ish or if use_persistent is True
            if page.url == "about:blank" and url:
                await page.goto(url)
            
            await page.click(selector)
            return "Clicked element."
        except Exception as e:
            return f"Error: {str(e)}"
    return run_sync(_action())

@tool
def fill_field(url: str, selector: str, text: str, use_persistent: bool = True) -> str:
    """Fill a text field or form element identified by the CSS selector with the provided text."""
    async def _action():
        try:
            page = await BrowserManager.get_page()
            await page.fill(selector, text)
            return f"Filled '{selector}' with text."
        except Exception as e:
            return f"Error: {str(e)}"
    return run_sync(_action())

@tool
def execute_javascript(url: str, script: str, use_persistent: bool = True) -> str:
    """Execute custom JavaScript on the page and return the result."""
    async def _action():
        try:
            page = await BrowserManager.get_page()
            result = await page.evaluate(script)
            return str(result)
        except Exception as e:
            return f"Error: {str(e)}"
    return run_sync(_action())

@tool
def get_cookies(url: str, use_persistent: bool = True) -> str:
    """Get all cookies for the current domain in JSON format."""
    async def _action():
        try:
            page = await BrowserManager.get_page()
            cookies = await page.context.cookies()
            return json.dumps(cookies)
        except Exception as e:
            return f"Error: {str(e)}"
    return run_sync(_action())

@tool
def set_cookies(url: str, cookies_json: str, use_persistent: bool = True) -> str:
    """Set cookies on the page from a JSON string."""
    async def _action():
        try:
            page = await BrowserManager.get_page()
            cookies = json.loads(cookies_json)
            await page.context.add_cookies(cookies)
            return "Cookies set."
        except Exception as e:
            return f"Error: {str(e)}"
    return run_sync(_action())

@tool
def scroll_page(url: str, direction: str = "bottom", pixels: float = 500, use_persistent: bool = True) -> str:
    """Scroll the page in a specified direction ('bottom', 'top', 'down', 'up')."""
    async def _action():
        try:
            page = await BrowserManager.get_page()
            if direction == "bottom":
                await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
            elif direction == "top":
                await page.evaluate("window.scrollTo(0, 0)")
            elif direction == "down":
                await page.evaluate(f"window.scrollBy(0, {pixels})")
            elif direction == "up":
                await page.evaluate(f"window.scrollBy(0, -{pixels})")
            return f"Scrolled {direction}."
        except Exception as e:
            return f"Error: {str(e)}"
    return run_sync(_action())

@tool
def take_screenshot(url: str, full_page: bool = False, use_persistent: bool = True) -> str:
    """Take a screenshot of the current page and return base64 string."""
    async def _action():
        try:
            page = await BrowserManager.get_page()
            import base64
            screenshot = await page.screenshot(full_page=full_page)
            return f"Screenshot captured (base64): {base64.b64encode(screenshot).decode()[:100]}..." 
        except Exception as e:
            return f"Error: {str(e)}"
    return run_sync(_action())

@tool
def get_html_source(url: str, use_persistent: bool = True) -> str:
    """Get the full HTML source code of the current page."""
    async def _action():
        try:
            page = await BrowserManager.get_page()
            return await page.content()
        except Exception as e:
            return f"Error: {str(e)}"
    return run_sync(_action())

@tool
def get_page_info(url: str, use_persistent: bool = True) -> str:
    """Get comprehensive page information including title, URL."""
    async def _action():
        try:
            page = await BrowserManager.get_page()
            return json.dumps({
                "title": await page.title(),
                "url": page.url,
                "content_preview": (await page.content())[:500]
            })
        except Exception as e:
            return f"Error: {str(e)}"
    return run_sync(_action())

@tool
def wait_for_element(url: str, selector: str, timeout: float = 10, use_persistent: bool = True) -> str:
    """Wait for an element matching the CSS selector to appear on the page."""
    async def _action():
        try:
            page = await BrowserManager.get_page()
            await page.wait_for_selector(selector, timeout=timeout * 1000)
            return f"Element '{selector}' appeared."
        except Exception as e:
            return f"Error: {str(e)}"
    return run_sync(_action())

@tool
def task_complete(reason: str) -> str:
    """Call this tool when you have successfully completed the task and have the final data or answer."""
    return f"TASK COMPLETE: {reason}"

@tool
def agent_reflection(thought: str, adaptation_plan: str) -> str:
    """Call this tool to reflect on your progress, especially after an error or unexpected result.
    Explain what you've learned and how you're adapting your strategy."""
    return f"REFLECTION: {thought}\nADAPTATION PLAN: {adaptation_plan}"

def get_all_browser_tools():
    """Returns a list of all browser automation tools."""
    return [
        browse_and_extract,
        click_element,
        fill_field,
        execute_javascript,
        get_cookies,
        set_cookies,
        scroll_page,
        take_screenshot,
        get_html_source,
        get_page_info,
        wait_for_element,
        task_complete,
        agent_reflection
    ]