google-labs-jules[bot] Greene-ctrl commited on
Commit
6077105
·
1 Parent(s): f43283a

Enhance CyberScraper 2077 with browser automation tools for captcha bypass

Browse files

Summary of changes:
- Integrated browser automation tools (click, fill, cookies, JS execution) using `gradio_client` to connect to `diamond-in/Browser-Use-mcp`.
- Updated `WebExtractor` with a tool-calling loop, enabling AI to autonomously navigate and handle blocks.
- Added `gradio_client` to `requirements.txt`.
- Provided a suite of LangChain-compatible tools in `src/utils/browser_tools.py`.
- Maintained all previous deployment configurations for Hugging Face Spaces (Nginx, FastAPI, Blablador LLM).
- Verified implementation locally and on the live Hugging Face Space.

Co-authored-by: Greene-ctrl <192867433+Greene-ctrl@users.noreply.github.com>

requirements.txt CHANGED
@@ -39,3 +39,6 @@ google-api-python-client==2.188.0
39
 
40
  # Tor support
41
  PySocks==1.7.1
 
 
 
 
39
 
40
  # Tor support
41
  PySocks==1.7.1
42
+
43
+ # Browser-Use API
44
+ gradio_client
src/utils/browser_tools.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ from typing import Optional, Dict, Any
4
+ from gradio_client import Client
5
+ from langchain_core.tools import tool
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ # Use the Hugging Face Space provided in the prompt
10
+ MCP_BROWSER_SPACE = "diamond-in/Browser-Use-mcp"
11
+
12
+ def get_browser_client():
13
+ try:
14
+ return Client(MCP_BROWSER_SPACE)
15
+ except Exception as e:
16
+ logger.error(f"Failed to initialize Gradio client: {e}")
17
+ return None
18
+
19
+ @tool
20
+ def browse_and_extract(url: str, selector: str = "body", use_persistent: bool = False) -> str:
21
+ """Browse to a URL and extract text content from the specified CSS selector."""
22
+ client = get_browser_client()
23
+ if not client: return "Error: Browser client unavailable."
24
+ try:
25
+ result = client.predict(
26
+ url=url,
27
+ selector=selector,
28
+ use_persistent=use_persistent,
29
+ api_name="/browse_and_extract"
30
+ )
31
+ return str(result)
32
+ except Exception as e:
33
+ return f"Error during browse_and_extract: {str(e)}"
34
+
35
+ @tool
36
+ def click_element(url: str, selector: str, use_persistent: bool = False) -> str:
37
+ """Click an element on the page identified by the CSS selector."""
38
+ client = get_browser_client()
39
+ if not client: return "Error: Browser client unavailable."
40
+ try:
41
+ result = client.predict(
42
+ url=url,
43
+ selector=selector,
44
+ use_persistent=use_persistent,
45
+ api_name="/click"
46
+ )
47
+ return str(result)
48
+ except Exception as e:
49
+ return f"Error during click_element: {str(e)}"
50
+
51
+ @tool
52
+ def fill_field(url: str, selector: str, text: str, use_persistent: bool = False) -> str:
53
+ """Fill a text field or form element identified by the CSS selector with the provided text."""
54
+ client = get_browser_client()
55
+ if not client: return "Error: Browser client unavailable."
56
+ try:
57
+ result = client.predict(
58
+ url=url,
59
+ selector=selector,
60
+ text=text,
61
+ use_persistent=use_persistent,
62
+ api_name="/fill"
63
+ )
64
+ return str(result)
65
+ except Exception as e:
66
+ return f"Error during fill_field: {str(e)}"
67
+
68
+ @tool
69
+ def execute_javascript(url: str, script: str, use_persistent: bool = False) -> str:
70
+ """Execute custom JavaScript on the page and return the result."""
71
+ client = get_browser_client()
72
+ if not client: return "Error: Browser client unavailable."
73
+ try:
74
+ result = client.predict(
75
+ url=url,
76
+ script=script,
77
+ use_persistent=use_persistent,
78
+ api_name="/execute_js"
79
+ )
80
+ return str(result)
81
+ except Exception as e:
82
+ return f"Error during execute_javascript: {str(e)}"
83
+
84
+ @tool
85
+ def get_cookies(url: str, use_persistent: bool = False) -> str:
86
+ """Get all cookies for the current domain in JSON format."""
87
+ client = get_browser_client()
88
+ if not client: return "Error: Browser client unavailable."
89
+ try:
90
+ result = client.predict(
91
+ url=url,
92
+ use_persistent=use_persistent,
93
+ api_name="/get_cookies"
94
+ )
95
+ return str(result)
96
+ except Exception as e:
97
+ return f"Error during get_cookies: {str(e)}"
98
+
99
+ @tool
100
+ def set_cookies(url: str, cookies_json: str, use_persistent: bool = False) -> str:
101
+ """Set cookies on the page from a JSON string."""
102
+ client = get_browser_client()
103
+ if not client: return "Error: Browser client unavailable."
104
+ try:
105
+ result = client.predict(
106
+ url=url,
107
+ cookies_json=cookies_json,
108
+ use_persistent=use_persistent,
109
+ api_name="/set_cookies"
110
+ )
111
+ return str(result)
112
+ except Exception as e:
113
+ return f"Error during set_cookies: {str(e)}"
114
+
115
+ @tool
116
+ def scroll_page(url: str, direction: str = "bottom", pixels: float = 500, use_persistent: bool = False) -> str:
117
+ """Scroll the page in a specified direction ('bottom', 'top', 'down', 'up')."""
118
+ client = get_browser_client()
119
+ if not client: return "Error: Browser client unavailable."
120
+ try:
121
+ result = client.predict(
122
+ url=url,
123
+ direction=direction,
124
+ pixels=pixels,
125
+ use_persistent=use_persistent,
126
+ api_name="/scroll_page"
127
+ )
128
+ return str(result)
129
+ except Exception as e:
130
+ return f"Error during scroll_page: {str(e)}"
131
+
132
+ @tool
133
+ def take_screenshot(url: str, full_page: bool = False, use_persistent: bool = False) -> str:
134
+ """Take a screenshot of the current page and return the image data or path info."""
135
+ client = get_browser_client()
136
+ if not client: return "Error: Browser client unavailable."
137
+ try:
138
+ result = client.predict(
139
+ url=url,
140
+ full_page=full_page,
141
+ use_persistent=use_persistent,
142
+ api_name="/screenshot"
143
+ )
144
+ return f"Screenshot captured: {json.dumps(result)}"
145
+ except Exception as e:
146
+ return f"Error during take_screenshot: {str(e)}"
147
+
148
+ def get_all_browser_tools():
149
+ """Returns a list of all browser automation tools."""
150
+ return [
151
+ browse_and_extract,
152
+ click_element,
153
+ fill_field,
154
+ execute_javascript,
155
+ get_cookies,
156
+ set_cookies,
157
+ scroll_page,
158
+ take_screenshot
159
+ ]
src/web_extractor.py CHANGED
@@ -22,6 +22,7 @@ from .prompts import get_prompt_for_model
22
  from .scrapers.tor.tor_scraper import TorScraper
23
  from .scrapers.tor.tor_config import TorConfig
24
  from .scrapers.tor.exceptions import TorException
 
25
 
26
  logger = logging.getLogger(__name__)
27
 
@@ -109,6 +110,7 @@ class WebExtractor:
109
  self.content_hash: str | None = None
110
  self.tor_config = tor_config or TorConfig()
111
  self.tor_scraper = TorScraper(self.tor_config)
 
112
 
113
  @staticmethod
114
  def num_tokens_from_string(string: str) -> int:
@@ -137,7 +139,12 @@ class WebExtractor:
137
  return history_text.strip() if history_text else "No previous conversation."
138
 
139
  async def _call_model(self, query: str, conversation_history: list[dict] | None = None) -> str:
140
- """Call the model to extract information from preprocessed content."""
 
 
 
 
 
141
  prompt_template = get_prompt_for_model(self.model_name)
142
 
143
  # Format conversation history
@@ -159,6 +166,58 @@ class WebExtractor:
159
  })
160
  return response.content
161
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  @staticmethod
163
  def _is_page_spec(value: str) -> bool:
164
  """Check if a string is a valid page specification (e.g., '1-5', '1,3,5', '2')."""
 
22
  from .scrapers.tor.tor_scraper import TorScraper
23
  from .scrapers.tor.tor_config import TorConfig
24
  from .scrapers.tor.exceptions import TorException
25
+ from .utils.browser_tools import get_all_browser_tools
26
 
27
  logger = logging.getLogger(__name__)
28
 
 
110
  self.content_hash: str | None = None
111
  self.tor_config = tor_config or TorConfig()
112
  self.tor_scraper = TorScraper(self.tor_config)
113
+ self.tools = get_all_browser_tools()
114
 
115
  @staticmethod
116
  def num_tokens_from_string(string: str) -> int:
 
139
  return history_text.strip() if history_text else "No previous conversation."
140
 
141
  async def _call_model(self, query: str, conversation_history: list[dict] | None = None) -> str:
142
+ """Call the model to extract information from preprocessed content, with tool support if available."""
143
+
144
+ # Check if the model supports tool calling
145
+ if hasattr(self.model, "bind_tools") and not isinstance(self.model, OllamaModel):
146
+ return await self._call_model_with_tools(query, conversation_history)
147
+
148
  prompt_template = get_prompt_for_model(self.model_name)
149
 
150
  # Format conversation history
 
166
  })
167
  return response.content
168
 
169
+ async def _call_model_with_tools(self, query: str, conversation_history: list[dict] | None = None) -> str:
170
+ """Execute a tool-calling loop with the model."""
171
+ from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage, AIMessage
172
+
173
+ history_text = self._format_conversation_history(conversation_history)
174
+
175
+ system_prompt = f"""You are a master netrunner AI with the personality of Rebecca from Cyberpunk 2077.
176
+ You help users scrape and extract data. You have access to advanced browser automation tools.
177
+
178
+ Current webpage content (preprocessed):
179
+ {self.preprocessed_content}
180
+
181
+ Conversation history:
182
+ {history_text}
183
+
184
+ If you are blocked, see a captcha, or the content above is incomplete, use your tools to interact with the page, get cookies, or execute JavaScript.
185
+ Always try to return the final data in the format requested by the user.
186
+ """
187
+
188
+ messages = [
189
+ SystemMessage(content=system_prompt),
190
+ HumanMessage(content=query)
191
+ ]
192
+
193
+ model_with_tools = self.model.bind_tools(self.tools)
194
+
195
+ # Tool execution loop (max 5 iterations)
196
+ for _ in range(5):
197
+ response = await model_with_tools.ainvoke(messages)
198
+ messages.append(response)
199
+
200
+ if not response.tool_calls:
201
+ return response.content
202
+
203
+ for tool_call in response.tool_calls:
204
+ tool_name = tool_call["name"].lower()
205
+ tool_args = tool_call["args"]
206
+
207
+ # Find the tool
208
+ selected_tool = next((t for t in self.tools if t.name.lower() == tool_name), None)
209
+ if selected_tool:
210
+ try:
211
+ observation = selected_tool.invoke(tool_args)
212
+ except Exception as e:
213
+ observation = f"Error executing tool {tool_name}: {str(e)}"
214
+ else:
215
+ observation = f"Tool {tool_name} not found."
216
+
217
+ messages.append(ToolMessage(content=str(observation), tool_call_id=tool_call["id"]))
218
+
219
+ return messages[-1].content if hasattr(messages[-1], "content") else str(messages[-1])
220
+
221
  @staticmethod
222
  def _is_page_spec(value: str) -> bool:
223
  """Check if a string is a valid page specification (e.g., '1-5', '1,3,5', '2')."""
test_tools.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from src.utils.browser_tools import get_all_browser_tools
3
+
4
+ def test_tools():
5
+ tools = get_all_browser_tools()
6
+ print(f"Number of tools initialized: {len(tools)}")
7
+ for tool in tools:
8
+ print(f"Tool name: {tool.name}")
9
+
10
+ if __name__ == "__main__":
11
+ test_tools()