varun324242 commited on
Commit
0d3af20
·
verified ·
1 Parent(s): d073bea

Upload 58 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. BrowsingAgent/.DS_Store +0 -0
  2. BrowsingAgent/BrowsingAgent.py +184 -0
  3. BrowsingAgent/__init__.py +1 -0
  4. BrowsingAgent/__pycache__/BrowsingAgent.cpython-311.pyc +0 -0
  5. BrowsingAgent/__pycache__/BrowsingAgent.cpython-313.pyc +0 -0
  6. BrowsingAgent/__pycache__/__init__.cpython-311.pyc +0 -0
  7. BrowsingAgent/__pycache__/__init__.cpython-313.pyc +0 -0
  8. BrowsingAgent/__pycache__/ma.cpython-311.pyc +0 -0
  9. BrowsingAgent/instructions.md +23 -0
  10. BrowsingAgent/ma.py +522 -0
  11. BrowsingAgent/requirements.txt +5 -0
  12. BrowsingAgent/tools/ClickElement.py +59 -0
  13. BrowsingAgent/tools/ExportFile.py +45 -0
  14. BrowsingAgent/tools/GoBack.py +22 -0
  15. BrowsingAgent/tools/ReadURL.py +44 -0
  16. BrowsingAgent/tools/Scroll.py +53 -0
  17. BrowsingAgent/tools/SearchAndScrape.py +41 -0
  18. BrowsingAgent/tools/SelectDropdown.py +58 -0
  19. BrowsingAgent/tools/SendKeys.py +73 -0
  20. BrowsingAgent/tools/SolveCaptcha.py +238 -0
  21. BrowsingAgent/tools/WebPageSummarizer.py +39 -0
  22. BrowsingAgent/tools/__init__.py +10 -0
  23. BrowsingAgent/tools/__pycache__/ClickElement.cpython-311.pyc +0 -0
  24. BrowsingAgent/tools/__pycache__/ClickElement.cpython-313.pyc +0 -0
  25. BrowsingAgent/tools/__pycache__/ExportFile.cpython-311.pyc +0 -0
  26. BrowsingAgent/tools/__pycache__/ExportFile.cpython-313.pyc +0 -0
  27. BrowsingAgent/tools/__pycache__/GoBack.cpython-311.pyc +0 -0
  28. BrowsingAgent/tools/__pycache__/GoBack.cpython-313.pyc +0 -0
  29. BrowsingAgent/tools/__pycache__/ReadURL.cpython-311.pyc +0 -0
  30. BrowsingAgent/tools/__pycache__/ReadURL.cpython-313.pyc +0 -0
  31. BrowsingAgent/tools/__pycache__/Scroll.cpython-311.pyc +0 -0
  32. BrowsingAgent/tools/__pycache__/Scroll.cpython-313.pyc +0 -0
  33. BrowsingAgent/tools/__pycache__/SearchAndScrape.cpython-311.pyc +0 -0
  34. BrowsingAgent/tools/__pycache__/SearchAndScrape.cpython-313.pyc +0 -0
  35. BrowsingAgent/tools/__pycache__/SelectDropdown.cpython-311.pyc +0 -0
  36. BrowsingAgent/tools/__pycache__/SelectDropdown.cpython-313.pyc +0 -0
  37. BrowsingAgent/tools/__pycache__/SendKeys.cpython-311.pyc +0 -0
  38. BrowsingAgent/tools/__pycache__/SendKeys.cpython-313.pyc +0 -0
  39. BrowsingAgent/tools/__pycache__/SolveCaptcha.cpython-311.pyc +0 -0
  40. BrowsingAgent/tools/__pycache__/SolveCaptcha.cpython-313.pyc +0 -0
  41. BrowsingAgent/tools/__pycache__/WebPageSummarizer.cpython-311.pyc +0 -0
  42. BrowsingAgent/tools/__pycache__/WebPageSummarizer.cpython-313.pyc +0 -0
  43. BrowsingAgent/tools/__pycache__/__init__.cpython-311.pyc +0 -0
  44. BrowsingAgent/tools/__pycache__/__init__.cpython-313.pyc +0 -0
  45. BrowsingAgent/tools/util/__init__.py +3 -0
  46. BrowsingAgent/tools/util/__pycache__/__init__.cpython-311.pyc +0 -0
  47. BrowsingAgent/tools/util/__pycache__/__init__.cpython-313.pyc +0 -0
  48. BrowsingAgent/tools/util/__pycache__/get_b64_screenshot.cpython-311.pyc +0 -0
  49. BrowsingAgent/tools/util/__pycache__/get_b64_screenshot.cpython-313.pyc +0 -0
  50. BrowsingAgent/tools/util/__pycache__/highlights.cpython-311.pyc +0 -0
BrowsingAgent/.DS_Store ADDED
Binary file (6.15 kB). View file
 
BrowsingAgent/BrowsingAgent.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ import logging
4
+ from agency_swarm.agents import Agent
5
+ from typing_extensions import override
6
+ import base64
7
+ from .tools.SearchAndScrape import SearchAndScrape
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.support.select import Select
10
+ from .tools.util import highlight_elements_with_labels, get_web_driver, set_web_driver
11
+ from agency_swarm.tools.oai import FileSearch
12
+
13
+
14
+ class BrowsingAgent(Agent):
15
+ SCREENSHOT_FILE_NAME = "screenshot.jpg"
16
+
17
+ def __init__(self, selenium_config=None, **kwargs):
18
+ from .tools.util.selenium import set_selenium_config
19
+ super().__init__(
20
+ name="BrowsingAgent",
21
+ description="This agent is designed to perform web searches and navigate web pages.",
22
+ instructions="""
23
+ I am a browsing agent that can:
24
+ 1. Perform Google searches
25
+ 2. Navigate web pages
26
+ 3. Take screenshots
27
+ 4. Highlight and interact with page elements
28
+
29
+ Use my search capabilities to find information and my navigation tools to explore web pages.
30
+ """,
31
+ files_folder="./files",
32
+ schemas_folder="./schemas",
33
+ tools=[SearchAndScrape],
34
+ tools_folder="./tools",
35
+ temperature=0,
36
+ max_prompt_tokens=16000,
37
+ model="groq/llama-3.3-70b-versatile",
38
+ **kwargs
39
+ )
40
+ if selenium_config is not None:
41
+ set_selenium_config(selenium_config)
42
+
43
+ self.prev_message = ""
44
+
45
+ @override
46
+ def response_validator(self, message):
47
+ from .tools.util.selenium import get_web_driver, set_web_driver
48
+ from .tools.util import highlight_elements_with_labels, remove_highlight_and_labels
49
+ from selenium.webdriver.common.by import By
50
+ from selenium.webdriver.support.select import Select
51
+
52
+ # Filter out everything in square brackets
53
+ filtered_message = re.sub(r'\[.*?\]', '', message).strip()
54
+
55
+ if filtered_message and self.prev_message == filtered_message:
56
+ raise ValueError("Do not repeat yourself. If you are stuck, try a different approach or search in google for the page you are looking for directly.")
57
+
58
+ self.prev_message = filtered_message
59
+
60
+ if "[send screenshot]" in message.lower():
61
+ wd = get_web_driver()
62
+ remove_highlight_and_labels(wd)
63
+ self.take_screenshot()
64
+ response_text = "Here is the screenshot of the current web page:"
65
+
66
+ elif '[highlight clickable elements]' in message.lower():
67
+ wd = get_web_driver()
68
+ highlight_elements_with_labels(wd, 'a, button, div[onclick], div[role="button"], div[tabindex], '
69
+ 'span[onclick], span[role="button"], span[tabindex]')
70
+ self._shared_state.set("elements_highlighted", 'a, button, div[onclick], div[role="button"], div[tabindex], '
71
+ 'span[onclick], span[role="button"], span[tabindex]')
72
+
73
+ self.take_screenshot()
74
+
75
+ all_elements = wd.find_elements(By.CSS_SELECTOR, '.highlighted-element')
76
+
77
+ all_element_texts = [element.text for element in all_elements]
78
+
79
+ element_texts_json = {}
80
+ for i, element_text in enumerate(all_element_texts):
81
+ element_texts_json[str(i + 1)] = self.remove_unicode(element_text)
82
+
83
+ element_texts_json = {k: v for k, v in element_texts_json.items() if v}
84
+
85
+ element_texts_formatted = ", ".join([f"{k}: {v}" for k, v in element_texts_json.items()])
86
+
87
+ response_text = ("Here is the screenshot of the current web page with highlighted clickable elements. \n\n"
88
+ "Texts of the elements are: " + element_texts_formatted + ".\n\n"
89
+ "Elements without text are not shown, but are available on screenshot. \n"
90
+ "Please make sure to analyze the screenshot to find the clickable element you need to click on.")
91
+
92
+ elif '[highlight text fields]' in message.lower():
93
+ wd = get_web_driver()
94
+ highlight_elements_with_labels(wd, 'input, textarea')
95
+ self._shared_state.set("elements_highlighted", "input, textarea")
96
+
97
+ self.take_screenshot()
98
+
99
+ all_elements = wd.find_elements(By.CSS_SELECTOR, '.highlighted-element')
100
+
101
+ all_element_texts = [element.text for element in all_elements]
102
+
103
+ element_texts_json = {}
104
+ for i, element_text in enumerate(all_element_texts):
105
+ element_texts_json[str(i + 1)] = self.remove_unicode(element_text)
106
+
107
+ element_texts_formatted = ", ".join([f"{k}: {v}" for k, v in element_texts_json.items()])
108
+
109
+ response_text = ("Here is the screenshot of the current web page with highlighted text fields: \n"
110
+ "Texts of the elements are: " + element_texts_formatted + ".\n"
111
+ "Please make sure to analyze the screenshot to find the text field you need to fill.")
112
+
113
+ elif '[highlight dropdowns]' in message.lower():
114
+ wd = get_web_driver()
115
+ highlight_elements_with_labels(wd, 'select')
116
+ self._shared_state.set("elements_highlighted", "select")
117
+
118
+ self.take_screenshot()
119
+
120
+ all_elements = wd.find_elements(By.CSS_SELECTOR, '.highlighted-element')
121
+
122
+ all_selector_values = {}
123
+
124
+ i = 0
125
+ for element in all_elements:
126
+ select = Select(element)
127
+ options = select.options
128
+ selector_values = {}
129
+ for j, option in enumerate(options):
130
+ selector_values[str(j)] = option.text
131
+ if j > 10:
132
+ break
133
+ all_selector_values[str(i + 1)] = selector_values
134
+
135
+ all_selector_values = {k: v for k, v in all_selector_values.items() if v}
136
+ all_selector_values_formatted = ", ".join([f"{k}: {v}" for k, v in all_selector_values.items()])
137
+
138
+ response_text = ("Here is the screenshot with highlighted dropdowns. \n"
139
+ "Selector values are: " + all_selector_values_formatted + ".\n"
140
+ "Please make sure to analyze the screenshot to find the dropdown you need to select.")
141
+
142
+ else:
143
+ return message
144
+
145
+ set_web_driver(wd)
146
+ content = self.create_response_content(response_text)
147
+ raise ValueError(content)
148
+
149
+ def take_screenshot(self):
150
+ from .tools.util.selenium import get_web_driver
151
+ from .tools.util import get_b64_screenshot
152
+ wd = get_web_driver()
153
+ screenshot = get_b64_screenshot(wd)
154
+ screenshot_data = base64.b64decode(screenshot)
155
+ with open(self.SCREENSHOT_FILE_NAME, "wb") as screenshot_file:
156
+ screenshot_file.write(screenshot_data)
157
+
158
+ def create_response_content(self, response_text):
159
+ with open(self.SCREENSHOT_FILE_NAME, "rb") as file:
160
+ file_id = self.client.files.create(
161
+ file=file,
162
+ purpose="vision",
163
+ ).id
164
+
165
+ content = [
166
+ {"type": "text", "text": response_text},
167
+ {
168
+ "type": "image_file",
169
+ "image_file": {"file_id": file_id}
170
+ }
171
+ ]
172
+ return content
173
+
174
+ # Function to check for Unicode escape sequences
175
+ def remove_unicode(self, data):
176
+ return re.sub(r'[^\x00-\x7F]+', '', data)
177
+
178
+ def run_search_and_scrape(self, query):
179
+ """Run the SearchAndScrape tool and process the results."""
180
+ tool = SearchAndScrape(query=query)
181
+ result = tool.run()
182
+ logging.info(f"Search and Scrape result: {result}")
183
+ return result
184
+
BrowsingAgent/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .BrowsingAgent import BrowsingAgent
BrowsingAgent/__pycache__/BrowsingAgent.cpython-311.pyc ADDED
Binary file (11.2 kB). View file
 
BrowsingAgent/__pycache__/BrowsingAgent.cpython-313.pyc ADDED
Binary file (8.63 kB). View file
 
BrowsingAgent/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (302 Bytes). View file
 
BrowsingAgent/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (245 Bytes). View file
 
BrowsingAgent/__pycache__/ma.cpython-311.pyc ADDED
Binary file (23.1 kB). View file
 
BrowsingAgent/instructions.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Browsing Agent Instructions
2
+
3
+ As an advanced browsing agent, you are equipped with specialized tools to navigate and search the web effectively. Your primary objective is to fulfill the user's requests by efficiently utilizing these tools.
4
+
5
+ ### Primary Instructions:
6
+
7
+ 1. **Search and Scrape**: Use the SearchAndScrape tool to perform Google searches and scrape content using Firecrawl.
8
+ 2. **Navigating to New Pages**: Always use the `ClickElement` tool to open links when navigating to a new web page from the current source.
9
+ 3. **Single Page Interaction**: You can only open and interact with one web page at a time.
10
+ 4. **Requesting Screenshots**: Before using tools that interact with the web page, ask the user to send you the appropriate screenshot.
11
+
12
+ ### Commands to Request Screenshots:
13
+
14
+ - **'[send screenshot]'**: Sends the current browsing window as an image.
15
+ - **'[highlight clickable elements]'**: Highlights all clickable elements on the current web page.
16
+ - **'[highlight text fields]'**: Highlights all text fields on the current web page.
17
+ - **'[highlight dropdowns]'**: Highlights all dropdowns on the current web page.
18
+
19
+ ### Important Reminders:
20
+
21
+ - Only open and interact with one web page at a time.
22
+ - Use the SearchAndScrape tool for efficient web searching and content extraction.
23
+ - Complete your interactions with the current web page before proceeding to a different source.
BrowsingAgent/ma.py ADDED
@@ -0,0 +1,522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from datetime import datetime
3
+ from firecrawl import FirecrawlApp
4
+ import os
5
+ import time
6
+ import google.generativeai as genai
7
+ import requests # Import requests for making API calls
8
+ from googlesearch import search # Add this import at the top
9
+ import json
10
+
11
+ # Initialize logging
12
+ logging.basicConfig(level=logging.DEBUG)
13
+
14
+ # Initialize Firecrawl
15
+ FIRECRAWL_API_KEY = "fc-5fadfeae30314d4ea8a3d9afaa75c493"
16
+ firecrawl_app = FirecrawlApp(api_key=FIRECRAWL_API_KEY)
17
+ logging.info("Firecrawl initialized")
18
+
19
+ # Initialize Gemini
20
+ GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY', '')
21
+ if GOOGLE_API_KEY:
22
+ genai.configure(api_key=GOOGLE_API_KEY)
23
+ model = genai.GenerativeModel('gemini-1.5-flash')
24
+ logging.info("Gemini initialized")
25
+ else:
26
+ logging.warning("No Gemini API key found")
27
+
28
+ def perform_search(query, use_custom_api=True):
29
+ """
30
+ Perform search with fallback mechanism
31
+ First tries Custom Search API, then falls back to googlesearch package
32
+ """
33
+ try:
34
+ if use_custom_api:
35
+ # Try Custom Search API first
36
+ api_key = "AIzaSyAxeLlJ6vZxOl-TblUJg_dInBS3vNxaFVY"
37
+ search_engine_id = "37793b12975da4e35"
38
+ url = f"https://www.googleapis.com/customsearch/v1?key={api_key}&cx={search_engine_id}&q={query}&num=2"
39
+
40
+ response = requests.get(url)
41
+ if response.status_code == 200:
42
+ search_results = response.json().get('items', [])
43
+ if search_results:
44
+ return [item['link'] for item in search_results]
45
+ logging.warning("Custom Search API failed, falling back to googlesearch")
46
+
47
+ # Fallback to googlesearch package
48
+ logging.info("Using googlesearch package")
49
+ return list(search(query, num_results=2, lang="en"))
50
+
51
+ except Exception as e:
52
+ logging.error(f"Search error: {str(e)}")
53
+ return []
54
+
55
+ def scrape_with_retry(url, max_retries=3, timeout=15):
56
+ """Helper function to scrape URL with retry logic and improved timeout handling"""
57
+ # List of problematic domains that often timeout
58
+ problematic_domains = [
59
+ 'sparktoro.com',
60
+ 'j-jdis.com',
61
+ 'linkedin.com',
62
+ 'facebook.com',
63
+ 'twitter.com',
64
+ 'reddit.com',
65
+ '.pdf'
66
+ ]
67
+
68
+ # Skip problematic URLs immediately
69
+ if any(domain in url.lower() for domain in problematic_domains):
70
+ logging.info(f"Skipping known problematic URL: {url}")
71
+ return None
72
+
73
+ for attempt in range(max_retries):
74
+ try:
75
+ # Use shorter timeout for initial attempts
76
+ current_timeout = timeout * (attempt + 1) # Increase timeout with each retry
77
+
78
+ logging.info(f"Attempting to scrape {url} (timeout: {current_timeout}s)")
79
+
80
+ # Add timeout and rate limiting parameters
81
+ response = firecrawl_app.scrape_url(
82
+ url=url,
83
+ params={
84
+ 'formats': ['markdown'],
85
+ 'timeout': current_timeout,
86
+ 'wait': True, # Enable rate limiting
87
+ 'max_retries': 2 # Internal retries
88
+ }
89
+ )
90
+
91
+ if response and response.get('markdown'):
92
+ content = response.get('markdown')
93
+ if len(content.strip()) > 200: # Verify content quality
94
+ logging.info(f"Successfully scraped {url}")
95
+ return content
96
+ else:
97
+ logging.warning(f"Content too short from {url}")
98
+ return None
99
+
100
+ except Exception as e:
101
+ error_msg = str(e).lower()
102
+ wait_time = (attempt + 1) * 5 # Reduced wait times
103
+
104
+ if "timeout" in error_msg or "408" in error_msg:
105
+ if attempt < max_retries - 1:
106
+ logging.warning(f"Timeout error for {url}, attempt {attempt + 1}")
107
+ logging.info(f"Waiting {wait_time}s before retry...")
108
+ time.sleep(wait_time)
109
+ continue
110
+ else:
111
+ logging.error(f"Final timeout for {url} after {max_retries} attempts")
112
+ break
113
+
114
+ elif "429" in error_msg: # Rate limit
115
+ logging.info(f"Rate limit hit, waiting {wait_time}s...")
116
+ time.sleep(wait_time)
117
+ continue
118
+
119
+ else:
120
+ logging.error(f"Error scraping {url}: {error_msg}")
121
+ break
122
+
123
+ time.sleep(1) # Reduced basic delay
124
+
125
+ return None
126
+
127
+ def get_trends_data(query):
128
+ """Get market trends data with improved error handling"""
129
+ try:
130
+ if not query:
131
+ logging.error("No query provided")
132
+ return generate_fallback_response("Unknown Business")
133
+
134
+ logging.info(f"\n{'='*50}\nGathering trends data for: {query}\n{'='*50}")
135
+
136
+ # Define search queries
137
+ search_queries = [
138
+ # Market Overview
139
+ f"{query} market size revenue statistics analysis",
140
+
141
+ # Industry Trends
142
+ f"{query} industry trends growth forecast analysis",
143
+
144
+ # Competition Analysis
145
+ f"{query} market share competitive landscape analysis",
146
+
147
+ # Technology & Innovation
148
+ f"{query} technology innovation disruption analysis",
149
+
150
+ # Future Outlook
151
+ f"{query} market future outlook predictions analysis"
152
+ ]
153
+
154
+ scraped_content = []
155
+ use_custom_api = True
156
+ successful_scrapes = 0
157
+ min_required_content = 2
158
+ max_attempts_per_url = 2
159
+
160
+ for search_query in search_queries:
161
+ if successful_scrapes >= min_required_content:
162
+ break
163
+
164
+ try:
165
+ logging.info(f"\nSearching for: {search_query}")
166
+ search_results = perform_search(search_query, use_custom_api)
167
+
168
+ if not search_results and use_custom_api:
169
+ use_custom_api = False
170
+ search_results = perform_search(search_query, use_custom_api=False)
171
+
172
+ if search_results:
173
+ attempts = 0
174
+ for url in search_results:
175
+ if successful_scrapes >= min_required_content or attempts >= max_attempts_per_url:
176
+ break
177
+
178
+ content = scrape_with_retry(url, timeout=15) # Reduced initial timeout
179
+ if content:
180
+ scraped_content.append({
181
+ 'url': url,
182
+ 'domain': extract_domain(url),
183
+ 'section': 'Market Trends',
184
+ 'date': datetime.now().strftime("%Y-%m-%d"),
185
+ 'content': content[:2000]
186
+ })
187
+ successful_scrapes += 1
188
+ attempts += 1
189
+
190
+ time.sleep(1) # Reduced delay between queries
191
+
192
+ except Exception as e:
193
+ logging.error(f"Error in search for query '{search_query}': {str(e)}")
194
+ continue
195
+
196
+ if not scraped_content:
197
+ logging.warning("No content scraped, returning fallback response")
198
+ return generate_fallback_response(query)
199
+
200
+ try:
201
+ result = process_scraped_content(scraped_content, query)
202
+
203
+ # Save analysis to file
204
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
205
+ output_file = os.path.join('gemini_outputs', f'market_trends_{timestamp}.txt')
206
+
207
+ with open(output_file, 'w', encoding='utf-8') as f:
208
+ f.write(f"Market Trends Analysis for: {query}\n")
209
+ f.write(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
210
+ f.write("="*50 + "\n\n")
211
+ f.write(json.dumps(result, indent=2))
212
+ f.write("\n\nData Sources:\n")
213
+ for source in scraped_content:
214
+ f.write(f"- {source['domain']} ({source['date']})\n")
215
+
216
+ return result
217
+
218
+ except Exception as e:
219
+ logging.error(f"Error processing content: {str(e)}")
220
+ return generate_fallback_response(query)
221
+
222
+ except Exception as e:
223
+ logging.error(f"Error during market trends analysis: {str(e)}")
224
+ return generate_fallback_response(query)
225
+
226
+ def process_scraped_content(scraped_content, query):
227
+ try:
228
+ # Generate analysis using the scraped content
229
+ analysis = generate_analysis(scraped_content, query)
230
+
231
+ # Structure the response
232
+ result = {
233
+ "market_size_growth": {
234
+ "total_market_value": extract_bullet_points(analysis, "Market Size"),
235
+ "market_segments": extract_bullet_points(analysis, "Market Segments"),
236
+ "regional_distribution": extract_bullet_points(analysis, "Regional Distribution")
237
+ },
238
+ "competitive_landscape": {
239
+ "market_leaders": extract_bullet_points(analysis, "Market Leaders"),
240
+ "market_differentiators": extract_bullet_points(analysis, "Market Differentiators"),
241
+ "industry_dynamics": extract_bullet_points(analysis, "Industry Dynamics")
242
+ },
243
+ "consumer_analysis": {
244
+ "segments": extract_bullet_points(analysis, "Consumer Segments"),
245
+ "behavior_patterns": extract_bullet_points(analysis, "Behavior Patterns"),
246
+ "pain_points": extract_bullet_points(analysis, "Pain Points")
247
+ },
248
+ "metrics": extract_metrics(scraped_content),
249
+ "sources": [{
250
+ 'url': item['url'],
251
+ 'domain': item['domain'],
252
+ 'section': item['section'],
253
+ 'date': item['date']
254
+ } for item in scraped_content]
255
+ }
256
+
257
+ return result
258
+ except Exception as e:
259
+ logging.error(f"Error processing scraped content: {str(e)}")
260
+ return generate_fallback_response(query)
261
+
262
+ def extract_domain(url):
263
+ """Extract domain name from URL"""
264
+ try:
265
+ from urllib.parse import urlparse
266
+ domain = urlparse(url).netloc
267
+ return domain.replace('www.', '')
268
+ except:
269
+ return url
270
+
271
+ def generate_fallback_response(query):
272
+ """Generate fallback response when analysis fails"""
273
+ return {
274
+ "market_size_growth": {
275
+ "total_market_value": [f"Market size analysis for {query} pending (Inferred)"],
276
+ "market_segments": ["Market segmentation analysis needed (Inferred)"],
277
+ "regional_distribution": ["Regional analysis to be conducted (Inferred)"]
278
+ },
279
+ "competitive_landscape": {
280
+ "market_leaders": ["Market leader analysis pending (Inferred)"],
281
+ "market_differentiators": ["Differentiator analysis needed (Inferred)"],
282
+ "industry_dynamics": ["Industry dynamics to be evaluated (Inferred)"]
283
+ },
284
+ "consumer_analysis": {
285
+ "segments": ["Consumer segmentation pending (Inferred)"],
286
+ "behavior_patterns": ["Behavior analysis needed (Inferred)"],
287
+ "pain_points": ["Pain point identification required (Inferred)"]
288
+ },
289
+ "metrics": {},
290
+ "sources": []
291
+ }
292
+
293
+ def process_analysis(analysis, scraped_content):
294
+ """Process and structure the analysis for frontend consumption"""
295
+ result = {
296
+ "market_size_growth": {
297
+ "total_market_value": [],
298
+ "market_segments": [],
299
+ "regional_distribution": [],
300
+ "growth_drivers": []
301
+ },
302
+ "competitive_landscape": {
303
+ "market_leaders": [],
304
+ "market_differentiators": [],
305
+ "industry_dynamics": [],
306
+ "entry_barriers": []
307
+ },
308
+ "consumer_analysis": {
309
+ "segments": [],
310
+ "behavior_patterns": [],
311
+ "pain_points": [],
312
+ "decision_factors": []
313
+ },
314
+ "technology_innovation": {
315
+ "current_trends": [],
316
+ "emerging_tech": [],
317
+ "digital_impact": [],
318
+ "innovation_opportunities": []
319
+ },
320
+ "regulatory_environment": {
321
+ "key_regulations": [],
322
+ "compliance_requirements": [],
323
+ "environmental_impact": [],
324
+ "sustainability": []
325
+ },
326
+ "future_outlook": {
327
+ "growth_forecast": [],
328
+ "opportunities": [],
329
+ "challenges": [],
330
+ "evolution_scenarios": []
331
+ },
332
+ "strategic_recommendations": {
333
+ "entry_strategies": [],
334
+ "product_development": [],
335
+ "tech_investments": [],
336
+ "risk_mitigation": []
337
+ },
338
+ "metrics": extract_metrics(scraped_content),
339
+ "sources": []
340
+ }
341
+
342
+ # Extract sections using more specific patterns
343
+ for section in result.keys():
344
+ if section != "metrics" and section != "sources":
345
+ for subsection in result[section].keys():
346
+ result[section][subsection] = extract_bullet_points(analysis, subsection.replace('_', ' ').title())
347
+
348
+ return result
349
+
350
+ def extract_metrics(scraped_content):
351
+ """Extract and structure metrics from scraped content"""
352
+ metrics = {
353
+ "market_share": {},
354
+ "growth_rates": {},
355
+ "revenue": {}
356
+ }
357
+
358
+ for item in scraped_content:
359
+ if 'metrics' in item:
360
+ # Process market share
361
+ for i, share in enumerate(item['metrics'].get('market_share', [])):
362
+ try:
363
+ value = float(share)
364
+ metrics['market_share'][f'Company {i+1}'] = value
365
+ except ValueError:
366
+ continue
367
+
368
+ # Process growth rates
369
+ for i, rate in enumerate(item['metrics'].get('growth_rates', [])):
370
+ try:
371
+ value = float(rate)
372
+ metrics['growth_rates'][f'Period {i+1}'] = value
373
+ except ValueError:
374
+ continue
375
+
376
+ # Process revenue figures
377
+ for i, amount in enumerate(item['metrics'].get('money', [])):
378
+ try:
379
+ value = float(amount)
380
+ metrics['revenue'][f'Entity {i+1}'] = value
381
+ except ValueError:
382
+ continue
383
+
384
+ return metrics
385
+
386
+ def extract_bullet_points(text, section_name):
387
+ """Extract bullet points from a specific section"""
388
+ try:
389
+ lines = []
390
+ in_section = False
391
+
392
+ for line in text.split('\n'):
393
+ if section_name in line:
394
+ in_section = True
395
+ continue
396
+ elif any(s in line for s in [
397
+ "Market Size", "Market Segments", "Regional Distribution",
398
+ "Market Leaders", "Market Differentiators", "Industry Dynamics",
399
+ "Consumer Segments", "Behavior Patterns", "Pain Points",
400
+ "Current Trends", "Emerging Technologies", "Growth Forecast",
401
+ "Opportunities", "Challenges"
402
+ ]):
403
+ in_section = False
404
+ elif in_section and line.strip().startswith('•'):
405
+ cleaned_line = line.strip('• ').strip()
406
+ if cleaned_line and not cleaned_line.endswith(':'):
407
+ lines.append(cleaned_line)
408
+
409
+ return lines if lines else [f"Analysis for {section_name} pending (Inferred)"]
410
+
411
+ except Exception as e:
412
+ logging.error(f"Error extracting bullet points for {section_name}: {str(e)}")
413
+ return [f"Error extracting {section_name} data (Inferred)"]
414
+
415
+ def generate_analysis(scraped_content, query):
416
+ """Generate market trends analysis using Gemini"""
417
+ try:
418
+ # Prepare content for analysis
419
+ content_text = "\n\n".join([item['content'] for item in scraped_content])
420
+
421
+ # Create the analysis prompt
422
+ analysis_prompt = f"""
423
+ Task: Analyze the provided content to create a detailed market trends analysis for {query}.
424
+
425
+ Content to analyze:
426
+ {content_text}
427
+
428
+ Please provide a structured analysis covering these exact sections:
429
+
430
+ Market Size & Growth:
431
+ Market Size:
432
+ • [Provide market size estimates with specific numbers where available]
433
+ • [Include year-over-year growth rates]
434
+
435
+ Market Segments:
436
+ • [Identify key market segments]
437
+ • [Provide segment-wise breakdown]
438
+
439
+ Regional Distribution:
440
+ • [Analyze geographical distribution]
441
+ • [Identify key markets and growth regions]
442
+
443
+ Competitive Landscape:
444
+ Market Leaders:
445
+ • [List top companies and their market positions]
446
+ • [Include market share data where available]
447
+
448
+ Market Differentiators:
449
+ • [Identify key competitive advantages]
450
+ • [Analyze unique selling propositions]
451
+
452
+ Industry Dynamics:
453
+ • [Analyze industry trends and changes]
454
+ • [Identify market drivers and challenges]
455
+
456
+ Consumer Analysis:
457
+ Consumer Segments:
458
+ • [Identify key customer segments]
459
+ • [Analyze segment characteristics]
460
+
461
+ Behavior Patterns:
462
+ • [Analyze purchasing patterns]
463
+ • [Identify decision factors]
464
+
465
+ Pain Points:
466
+ • [List key customer challenges]
467
+ • [Identify unmet needs]
468
+
469
+ Technology & Innovation:
470
+ Current Trends:
471
+ • [Identify current technology trends]
472
+ • [Analyze adoption rates]
473
+
474
+ Emerging Technologies:
475
+ • [List emerging technologies]
476
+ • [Assess potential impact]
477
+
478
+ Future Outlook:
479
+ Growth Forecast:
480
+ • [Provide growth projections]
481
+ • [Identify growth drivers]
482
+
483
+ Opportunities:
484
+ • [List market opportunities]
485
+ • [Identify potential areas for expansion]
486
+
487
+ Challenges:
488
+ • [Identify market challenges]
489
+ • [List potential risks]
490
+
491
+ Format each point with specific data where available.
492
+ Mark inferences with (Inferred).
493
+ Prioritize insights based on confidence and impact.
494
+ """
495
+
496
+ # Generate analysis using Gemini
497
+ response = model.generate_content(analysis_prompt)
498
+ if not response or not response.text:
499
+ raise Exception("No response from Gemini")
500
+
501
+ analysis = response.text
502
+
503
+ # Save raw analysis to file
504
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
505
+ raw_output_file = os.path.join('gemini_outputs', f'market_trends_raw_{timestamp}.txt')
506
+
507
+ with open(raw_output_file, 'w', encoding='utf-8') as f:
508
+ f.write(f"Raw Market Trends Analysis for: {query}\n")
509
+ f.write(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
510
+ f.write("="*50 + "\n\n")
511
+ f.write("Input Content:\n")
512
+ f.write("-"*30 + "\n")
513
+ f.write(content_text[:1000] + "...\n\n")
514
+ f.write("Generated Analysis:\n")
515
+ f.write("-"*30 + "\n")
516
+ f.write(analysis)
517
+
518
+ return analysis
519
+
520
+ except Exception as e:
521
+ logging.error(f"Error generating analysis: {str(e)}")
522
+ raise
BrowsingAgent/requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ selenium
2
+ webdriver-manager
3
+ selenium_stealth
4
+ googlesearch-python
5
+ firecrawl
BrowsingAgent/tools/ClickElement.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+
3
+ from pydantic import Field
4
+ from selenium.webdriver.common.by import By
5
+
6
+ from agency_swarm.tools import BaseTool
7
+ from .util import get_web_driver, set_web_driver
8
+ from .util.highlights import remove_highlight_and_labels
9
+
10
+
11
+ class ClickElement(BaseTool):
12
+ """
13
+ This tool clicks on an element on the current web page based on its number.
14
+
15
+ Before using this tool make sure to highlight clickable elements on the page by outputting '[highlight clickable elements]' message.
16
+ """
17
+ element_number: int = Field(
18
+ ...,
19
+ description="The number of the element to click on. The element numbers are displayed on the page after highlighting elements.",
20
+ )
21
+
22
+ def run(self):
23
+ wd = get_web_driver()
24
+
25
+ if 'button' not in self._shared_state.get("elements_highlighted", ""):
26
+ raise ValueError("Please highlight clickable elements on the page first by outputting '[highlight clickable elements]' message. You must output just the message without calling the tool first, so the user can respond with the screenshot.")
27
+
28
+ all_elements = wd.find_elements(By.CSS_SELECTOR, '.highlighted-element')
29
+
30
+ # iterate through all elements with a number in the text
31
+ try:
32
+ element_text = all_elements[self.element_number - 1].text
33
+ element_text = element_text.strip() if element_text else ""
34
+ # Subtract 1 because sequence numbers start at 1, but list indices start at 0
35
+ try:
36
+ all_elements[self.element_number - 1].click()
37
+ except Exception as e:
38
+ if "element click intercepted" in str(e).lower():
39
+ wd.execute_script("arguments[0].click();", all_elements[self.element_number - 1])
40
+ else:
41
+ raise e
42
+
43
+ time.sleep(3)
44
+
45
+ result = f"Clicked on element {self.element_number}. Text on clicked element: '{element_text}'. Current URL is {wd.current_url} To further analyze the page, output '[send screenshot]' command."
46
+ except IndexError:
47
+ result = "Element number is invalid. Please try again with a valid element number."
48
+ except Exception as e:
49
+ result = str(e)
50
+
51
+ wd = remove_highlight_and_labels(wd)
52
+
53
+ wd.execute_script("document.body.style.zoom='1.5'")
54
+
55
+ set_web_driver(wd)
56
+
57
+ self._shared_state.set("elements_highlighted", "")
58
+
59
+ return result
BrowsingAgent/tools/ExportFile.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import os
3
+
4
+ from agency_swarm.tools import BaseTool
5
+ from .util import get_web_driver
6
+
7
+
8
+ class ExportFile(BaseTool):
9
+ """This tool converts the current full web page into a file and returns its file_id. You can then send this file id back to the user for further processing."""
10
+
11
+ def run(self):
12
+ wd = get_web_driver()
13
+ from agency_swarm import get_openai_client
14
+ client = get_openai_client()
15
+
16
+ # Define the parameters for the PDF
17
+ params = {
18
+ 'landscape': False,
19
+ 'displayHeaderFooter': False,
20
+ 'printBackground': True,
21
+ 'preferCSSPageSize': True,
22
+ }
23
+
24
+ # Execute the command to print to PDF
25
+ result = wd.execute_cdp_cmd('Page.printToPDF', params)
26
+ pdf = result['data']
27
+
28
+ pdf_bytes = base64.b64decode(pdf)
29
+
30
+ # Save the PDF to a file
31
+ with open("exported_file.pdf", "wb") as f:
32
+ f.write(pdf_bytes)
33
+
34
+ file_id = client.files.create(file=open("exported_file.pdf", "rb"), purpose="assistants",).id
35
+
36
+ self._shared_state.set("file_id", file_id)
37
+
38
+ return "Success. File exported with id: `" + file_id + "` You can now send this file id back to the user."
39
+
40
+
41
+ if __name__ == "__main__":
42
+ wd = get_web_driver()
43
+ wd.get("https://www.google.com")
44
+ tool = ExportFile()
45
+ tool.run()
BrowsingAgent/tools/GoBack.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+
3
+ from agency_swarm.tools import BaseTool
4
+
5
+ from .util.selenium import get_web_driver, set_web_driver
6
+
7
+
8
+ class GoBack(BaseTool):
9
+ """W
10
+ This tool allows you to go back 1 page in the browser history. Use it in case of a mistake or if a page shows you unexpected content.
11
+ """
12
+
13
+ def run(self):
14
+ wd = get_web_driver()
15
+
16
+ wd.back()
17
+
18
+ time.sleep(3)
19
+
20
+ set_web_driver(wd)
21
+
22
+ return "Success. Went back 1 page. Current URL is: " + wd.current_url
BrowsingAgent/tools/ReadURL.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+
3
+ from pydantic import Field
4
+
5
+ from agency_swarm.tools import BaseTool
6
+ from .util.selenium import get_web_driver, set_web_driver
7
+
8
+
9
+ class ReadURL(BaseTool):
10
+ """
11
+ This tool reads a single URL and opens it in your current browser window. For each new source, either navigate directly to a URL that you believe contains the answer to the user's question or perform a Google search (e.g., 'https://google.com/search?q=search') if necessary.
12
+
13
+ If you are unsure of the direct URL, do not guess. Instead, use the ClickElement tool to click on links that might contain the desired information on the current web page.
14
+
15
+ Note: This tool only supports opening one URL at a time. The previous URL will be closed when you open a new one.
16
+ """
17
+ chain_of_thought: str = Field(
18
+ ..., description="Think step-by-step about where you need to navigate next to find the necessary information.",
19
+ exclude=True
20
+ )
21
+ url: str = Field(
22
+ ..., description="URL of the webpage.", examples=["https://google.com/search?q=search"]
23
+ )
24
+
25
+ class ToolConfig:
26
+ one_call_at_a_time: bool = True
27
+
28
+ def run(self):
29
+ wd = get_web_driver()
30
+
31
+ wd.get(self.url)
32
+
33
+ time.sleep(2)
34
+
35
+ set_web_driver(wd)
36
+
37
+ self._shared_state.set("elements_highlighted", "")
38
+
39
+ return "Current URL is: " + wd.current_url + "\n" + "Please output '[send screenshot]' next to analyze the current web page or '[highlight clickable elements]' for further navigation."
40
+
41
+
42
+ if __name__ == "__main__":
43
+ tool = ReadURL(url="https://google.com")
44
+ print(tool.run())
BrowsingAgent/tools/Scroll.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Literal
2
+
3
+ from pydantic import Field
4
+
5
+ from agency_swarm.tools import BaseTool
6
+ from .util.selenium import get_web_driver, set_web_driver
7
+
8
+
9
+ class Scroll(BaseTool):
10
+ """
11
+ This tool allows you to scroll the current web page up or down by 1 screen height.
12
+ """
13
+ direction: Literal["up", "down"] = Field(
14
+ ..., description="Direction to scroll."
15
+ )
16
+
17
+ def run(self):
18
+ wd = get_web_driver()
19
+
20
+ height = wd.get_window_size()['height']
21
+
22
+ # Get the zoom level
23
+ zoom_level = wd.execute_script("return document.body.style.zoom || '1';")
24
+ zoom_level = float(zoom_level.strip('%')) / 100 if '%' in zoom_level else float(zoom_level)
25
+
26
+ # Adjust height by zoom level
27
+ adjusted_height = height / zoom_level
28
+
29
+ current_scroll_position = wd.execute_script("return window.pageYOffset;")
30
+ total_scroll_height = wd.execute_script("return document.body.scrollHeight;")
31
+
32
+ result = ""
33
+
34
+ if self.direction == "up":
35
+ if current_scroll_position == 0:
36
+ # Reached the top of the page
37
+ result = "Reached the top of the page. Cannot scroll up any further.\n"
38
+ else:
39
+ wd.execute_script(f"window.scrollBy(0, -{adjusted_height});")
40
+ result = "Scrolled up by 1 screen height. Make sure to output '[send screenshot]' command to analyze the page after scrolling."
41
+
42
+ elif self.direction == "down":
43
+ if current_scroll_position + adjusted_height >= total_scroll_height:
44
+ # Reached the bottom of the page
45
+ result = "Reached the bottom of the page. Cannot scroll down any further.\n"
46
+ else:
47
+ wd.execute_script(f"window.scrollBy(0, {adjusted_height});")
48
+ result = "Scrolled down by 1 screen height. Make sure to output '[send screenshot]' command to analyze the page after scrolling."
49
+
50
+ set_web_driver(wd)
51
+
52
+ return result
53
+
BrowsingAgent/tools/SearchAndScrape.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agency_swarm.tools import BaseTool
2
+ from pydantic import Field
3
+ import logging
4
+ import os
5
+ from googlesearch import search
6
+ import json
7
+
8
+ class SearchAndScrape(BaseTool):
9
+ """
10
+ A tool to perform Google searches and return results.
11
+ """
12
+
13
+ query: str = Field(
14
+ ...,
15
+ description="The search query to look up",
16
+ examples=["best restaurants in New York", "how to learn python"]
17
+ )
18
+
19
+ def run(self):
20
+ """
21
+ Performs a Google search and returns the search results
22
+ """
23
+ try:
24
+ # Use the stop parameter to limit results
25
+ search_results = search(self.query, stop=5, lang="en")
26
+
27
+ # Convert generator to list
28
+ results = list(search_results)
29
+
30
+ return json.dumps({
31
+ "success": True,
32
+ "message": f"Found {len(results)} results for query: {self.query}",
33
+ "results": results
34
+ })
35
+
36
+ except Exception as e:
37
+ logging.error(f"Search error: {str(e)}")
38
+ return json.dumps({
39
+ "success": False,
40
+ "error": str(e)
41
+ })
BrowsingAgent/tools/SelectDropdown.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict
2
+ from pydantic import Field, model_validator
3
+ from selenium.webdriver.common.by import By
4
+ from selenium.webdriver.support.select import Select
5
+
6
+ from agency_swarm.tools import BaseTool
7
+ from .util import get_web_driver, set_web_driver
8
+ from .util.highlights import remove_highlight_and_labels
9
+
10
+
11
+ class SelectDropdown(BaseTool):
12
+ """
13
+ This tool selects an option in a dropdown on the current web page based on the description of that element and which option to select.
14
+
15
+ Before using this tool make sure to highlight dropdown elements on the page by outputting '[highlight dropdowns]' message.
16
+ """
17
+
18
+ key_value_pairs: Dict[str, str] = Field(...,
19
+ description="A dictionary where the key is the sequence number of the dropdown element and the value is the index of the option to select.",
20
+ examples=[{"1": 0, "2": 1}, {"3": 2}]
21
+ )
22
+
23
+ @model_validator(mode='before')
24
+ @classmethod
25
+ def check_key_value_pairs(cls, data):
26
+ if not data.get('key_value_pairs'):
27
+ raise ValueError(
28
+ "key_value_pairs is required. Example format: "
29
+ "key_value_pairs={'1': 0, '2': 1}"
30
+ )
31
+ return data
32
+
33
+ def run(self):
34
+ wd = get_web_driver()
35
+
36
+ if 'select' not in self._shared_state.get("elements_highlighted", ""):
37
+ raise ValueError("Please highlight dropdown elements on the page first by outputting '[highlight dropdowns]' message. You must output just the message without calling the tool first, so the user can respond with the screenshot.")
38
+
39
+ all_elements = wd.find_elements(By.CSS_SELECTOR, '.highlighted-element')
40
+
41
+ try:
42
+ for key, value in self.key_value_pairs.items():
43
+ key = int(key)
44
+ element = all_elements[key - 1]
45
+
46
+ select = Select(element)
47
+
48
+ # Select the first option (index 0)
49
+ select.select_by_index(int(value))
50
+ result = f"Success. Option is selected in the dropdown. To further analyze the page, output '[send screenshot]' command."
51
+ except Exception as e:
52
+ result = str(e)
53
+
54
+ remove_highlight_and_labels(wd)
55
+
56
+ set_web_driver(wd)
57
+
58
+ return result
BrowsingAgent/tools/SendKeys.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from typing import Dict
3
+
4
+ from pydantic import Field
5
+ from selenium.webdriver import Keys
6
+ from selenium.webdriver.common.by import By
7
+
8
+ from agency_swarm.tools import BaseTool
9
+ from .util import get_web_driver, set_web_driver
10
+ from .util.highlights import remove_highlight_and_labels
11
+
12
+
13
+ from pydantic import model_validator
14
+
15
+ class SendKeys(BaseTool):
16
+ """
17
+ This tool sends keys into input fields on the current webpage based on the description of that element and what needs to be typed. It then clicks "Enter" on the last element to submit the form. You do not need to tell it to press "Enter"; it will do that automatically.
18
+
19
+ Before using this tool make sure to highlight the input elements on the page by outputting '[highlight text fields]' message.
20
+ """
21
+ elements_and_texts: Dict[int, str] = Field(...,
22
+ description="A dictionary where the key is the element number and the value is the text to be typed.",
23
+ examples=[
24
+ {52: "johndoe@gmail.com", 53: "password123"},
25
+ {3: "John Doe", 4: "123 Main St"},
26
+ ]
27
+ )
28
+
29
+ @model_validator(mode='before')
30
+ @classmethod
31
+ def check_elements_and_texts(cls, data):
32
+ if not data.get('elements_and_texts'):
33
+ raise ValueError(
34
+ "elements_and_texts is required. Example format: "
35
+ "elements_and_texts={1: 'John Doe', 2: '123 Main St'}"
36
+ )
37
+ return data
38
+
39
+ def run(self):
40
+ wd = get_web_driver()
41
+ if 'input' not in self._shared_state.get("elements_highlighted", ""):
42
+ raise ValueError("Please highlight input elements on the page first by outputting '[highlight text fields]' message. You must output just the message without calling the tool first, so the user can respond with the screenshot.")
43
+
44
+ all_elements = wd.find_elements(By.CSS_SELECTOR, '.highlighted-element')
45
+
46
+ i = 0
47
+ try:
48
+ for key, value in self.elements_and_texts.items():
49
+ key = int(key)
50
+ element = all_elements[key - 1]
51
+
52
+ try:
53
+ element.click()
54
+ element.send_keys(Keys.CONTROL + "a") # Select all text in input
55
+ element.send_keys(Keys.DELETE)
56
+ element.clear()
57
+ except Exception as e:
58
+ pass
59
+ element.send_keys(value)
60
+ # send enter key to the last element
61
+ if i == len(self.elements_and_texts) - 1:
62
+ element.send_keys(Keys.RETURN)
63
+ time.sleep(3)
64
+ i += 1
65
+ result = f"Sent input to element and pressed Enter. Current URL is {wd.current_url} To further analyze the page, output '[send screenshot]' command."
66
+ except Exception as e:
67
+ result = str(e)
68
+
69
+ remove_highlight_and_labels(wd)
70
+
71
+ set_web_driver(wd)
72
+
73
+ return result
BrowsingAgent/tools/SolveCaptcha.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import time
3
+
4
+ from selenium.webdriver.common.by import By
5
+ from selenium.webdriver.support.expected_conditions import presence_of_element_located, \
6
+ frame_to_be_available_and_switch_to_it
7
+ from selenium.webdriver.support.wait import WebDriverWait
8
+
9
+ from agency_swarm.tools import BaseTool
10
+ from .util import get_b64_screenshot, remove_highlight_and_labels
11
+ from .util.selenium import get_web_driver
12
+ from agency_swarm.util import get_openai_client
13
+
14
+
15
+ class SolveCaptcha(BaseTool):
16
+ """
17
+ This tool asks a human to solve captcha on the current webpage. Make sure that captcha is visible before running it.
18
+ """
19
+
20
+ def run(self):
21
+ wd = get_web_driver()
22
+
23
+ try:
24
+ WebDriverWait(wd, 10).until(
25
+ frame_to_be_available_and_switch_to_it((By.XPATH, "//iframe[@title='reCAPTCHA']"))
26
+ )
27
+
28
+ element = WebDriverWait(wd, 3).until(
29
+ presence_of_element_located((By.ID, "recaptcha-anchor"))
30
+ )
31
+ except Exception as e:
32
+ return "Could not find captcha checkbox"
33
+
34
+ try:
35
+ # Scroll the element into view
36
+ wd.execute_script("arguments[0].scrollIntoView(true);", element)
37
+ time.sleep(1) # Give some time for the scrolling to complete
38
+
39
+ # Click the element using JavaScript
40
+ wd.execute_script("arguments[0].click();", element)
41
+ except Exception as e:
42
+ return f"Could not click captcha checkbox: {str(e)}"
43
+
44
+ try:
45
+ # Now check if the reCAPTCHA is checked
46
+ WebDriverWait(wd, 3).until(
47
+ lambda d: d.find_element(By.CLASS_NAME, "recaptcha-checkbox").get_attribute(
48
+ "aria-checked") == "true"
49
+ )
50
+
51
+ return "Success"
52
+ except Exception as e:
53
+ pass
54
+
55
+ wd.switch_to.default_content()
56
+
57
+ client = get_openai_client()
58
+
59
+ WebDriverWait(wd, 10).until(
60
+ frame_to_be_available_and_switch_to_it(
61
+ (By.XPATH, "//iframe[@title='recaptcha challenge expires in two minutes']"))
62
+ )
63
+
64
+ time.sleep(2)
65
+
66
+ attempts = 0
67
+ while attempts < 5:
68
+ tiles = wd.find_elements(By.CLASS_NAME, "rc-imageselect-tile")
69
+
70
+ # filter out tiles with rc-imageselect-dynamic-selected class
71
+ tiles = [tile for tile in tiles if
72
+ not tile.get_attribute("class").endswith("rc-imageselect-dynamic-selected")]
73
+
74
+ image_content = []
75
+ i = 0
76
+ for tile in tiles:
77
+ i += 1
78
+ screenshot = get_b64_screenshot(wd, tile)
79
+
80
+ image_content.append(
81
+ {
82
+ "type": "text",
83
+ "text": f"Image {i}:",
84
+ }
85
+ )
86
+ image_content.append(
87
+ {
88
+ "type": "image_url",
89
+ "image_url":
90
+ {
91
+ "url": f"data:image/jpeg;base64,{screenshot}",
92
+ "detail": "high",
93
+ }
94
+ },
95
+ )
96
+ # highlight all titles with rc-imageselect-tile class but not with rc-imageselect-dynamic-selected
97
+ # wd = highlight_elements_with_labels(wd, 'td.rc-imageselect-tile:not(.rc-imageselect-dynamic-selected)')
98
+
99
+ # screenshot = get_b64_screenshot(wd, wd.find_element(By.ID, "rc-imageselect"))
100
+
101
+ task_text = wd.find_element(By.CLASS_NAME, "rc-imageselect-instructions").text.strip().replace("\n",
102
+ " ")
103
+
104
+ continuous_task = 'once there are none left' in task_text.lower()
105
+
106
+ task_text = task_text.replace("Click verify", "Output 0")
107
+ task_text = task_text.replace("click skip", "Output 0")
108
+ task_text = task_text.replace("once", "if")
109
+ task_text = task_text.replace("none left", "none")
110
+ task_text = task_text.replace("all", "only")
111
+ task_text = task_text.replace("squares", "images")
112
+
113
+ additional_info = ""
114
+ if len(tiles) > 9:
115
+ additional_info = ("Keep in mind that all images are a part of a bigger image "
116
+ "from left to right, and top to bottom. The grid is 4x4. ")
117
+
118
+ messages = [
119
+ {
120
+ "role": "system",
121
+ "content": f"""You are an advanced AI designed to support users with visual impairments.
122
+ User will provide you with {i} images numbered from 1 to {i}. Your task is to output
123
+ the numbers of the images that contain the requested object, or at least some part of the requested
124
+ object. {additional_info}If there are no individual images that satisfy this condition, output 0.
125
+ """.replace("\n", ""),
126
+ },
127
+ {
128
+ "role": "user",
129
+ "content": [
130
+ *image_content,
131
+ {
132
+ "type": "text",
133
+ "text": f"{task_text}. Only output numbers separated by commas and nothing else. "
134
+ f"Output 0 if there are none."
135
+ }
136
+ ]
137
+ }]
138
+
139
+ response = client.chat.completions.create(
140
+ model="gpt-4o",
141
+ messages=messages,
142
+ max_tokens=1024,
143
+ temperature=0.0,
144
+ )
145
+
146
+ message = response.choices[0].message
147
+ message_text = message.content
148
+
149
+ # check if 0 is in the message
150
+ if "0" in message_text and "10" not in message_text:
151
+ # Find the button by its ID
152
+ verify_button = wd.find_element(By.ID, "recaptcha-verify-button")
153
+
154
+ verify_button_text = verify_button.text
155
+
156
+ # Click the button
157
+ wd.execute_script("arguments[0].click();", verify_button)
158
+
159
+ time.sleep(1)
160
+
161
+ try:
162
+ if self.verify_checkbox(wd):
163
+ return "Success. Captcha solved."
164
+ except Exception as e:
165
+ print('Not checked')
166
+ pass
167
+
168
+ else:
169
+ numbers = [int(s.strip()) for s in message_text.split(",") if s.strip().isdigit()]
170
+
171
+ # Click the tiles based on the provided numbers
172
+ for number in numbers:
173
+ wd.execute_script("arguments[0].click();", tiles[number - 1])
174
+ time.sleep(0.5)
175
+
176
+ time.sleep(3)
177
+
178
+ if not continuous_task:
179
+ # Find the button by its ID
180
+ verify_button = wd.find_element(By.ID, "recaptcha-verify-button")
181
+
182
+ verify_button_text = verify_button.text
183
+
184
+ # Click the button
185
+ wd.execute_script("arguments[0].click();", verify_button)
186
+
187
+ try:
188
+ if self.verify_checkbox(wd):
189
+ return "Success. Captcha solved."
190
+ except Exception as e:
191
+ pass
192
+ else:
193
+ continue
194
+
195
+ if "verify" in verify_button_text.lower():
196
+ attempts += 1
197
+
198
+ wd = remove_highlight_and_labels(wd)
199
+
200
+ wd.switch_to.default_content()
201
+
202
+ # close captcha
203
+ try:
204
+ element = WebDriverWait(wd, 3).until(
205
+ presence_of_element_located((By.XPATH, "//iframe[@title='reCAPTCHA']"))
206
+ )
207
+
208
+ wd.execute_script(f"document.elementFromPoint({element.location['x']}, {element.location['y']-10}).click();")
209
+ except Exception as e:
210
+ print(e)
211
+ pass
212
+
213
+ return "Could not solve captcha."
214
+
215
+ def verify_checkbox(self, wd):
216
+ wd.switch_to.default_content()
217
+
218
+ try:
219
+ WebDriverWait(wd, 10).until(
220
+ frame_to_be_available_and_switch_to_it((By.XPATH, "//iframe[@title='reCAPTCHA']"))
221
+ )
222
+
223
+ WebDriverWait(wd, 5).until(
224
+ lambda d: d.find_element(By.CLASS_NAME, "recaptcha-checkbox").get_attribute(
225
+ "aria-checked") == "true"
226
+ )
227
+
228
+ return True
229
+ except Exception as e:
230
+ wd.switch_to.default_content()
231
+
232
+ WebDriverWait(wd, 10).until(
233
+ frame_to_be_available_and_switch_to_it(
234
+ (By.XPATH, "//iframe[@title='recaptcha challenge expires in two minutes']"))
235
+ )
236
+
237
+ return False
238
+
BrowsingAgent/tools/WebPageSummarizer.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from selenium.webdriver.common.by import By
2
+
3
+ from agency_swarm.tools import BaseTool
4
+ from .util import get_web_driver, set_web_driver
5
+
6
+
7
+ class WebPageSummarizer(BaseTool):
8
+ """
9
+ This tool summarizes the content of the current web page, extracting the main points and providing a concise summary.
10
+ """
11
+
12
+ def run(self):
13
+ from agency_swarm import get_openai_client
14
+
15
+ wd = get_web_driver()
16
+ client = get_openai_client()
17
+
18
+ content = wd.find_element(By.TAG_NAME, "body").text
19
+
20
+ # only use the first 10000 characters
21
+ content = " ".join(content.split()[:10000])
22
+
23
+ completion = client.chat.completions.create(
24
+ model="gpt-3.5-turbo",
25
+ messages=[
26
+ {"role": "system", "content": "Your task is to summarize the content of the provided webpage. The summary should be concise and informative, capturing the main points and takeaways of the page."},
27
+ {"role": "user", "content": "Summarize the content of the following webpage:\n\n" + content},
28
+ ],
29
+ temperature=0.0,
30
+ )
31
+
32
+ return completion.choices[0].message.content
33
+
34
+ if __name__ == "__main__":
35
+ wd = get_web_driver()
36
+ wd.get("https://en.wikipedia.org/wiki/Python_(programming_language)")
37
+ set_web_driver(wd)
38
+ tool = WebPageSummarizer()
39
+ print(tool.run())
BrowsingAgent/tools/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from .Scroll import Scroll
2
+ from .ReadURL import ReadURL
3
+ from .SendKeys import SendKeys
4
+ from .ClickElement import ClickElement
5
+ from .GoBack import GoBack
6
+ from .SelectDropdown import SelectDropdown
7
+ from .SolveCaptcha import SolveCaptcha
8
+ from .ExportFile import ExportFile
9
+ from .WebPageSummarizer import WebPageSummarizer
10
+ from .SearchAndScrape import SearchAndScrape
BrowsingAgent/tools/__pycache__/ClickElement.cpython-311.pyc ADDED
Binary file (3.85 kB). View file
 
BrowsingAgent/tools/__pycache__/ClickElement.cpython-313.pyc ADDED
Binary file (3.5 kB). View file
 
BrowsingAgent/tools/__pycache__/ExportFile.cpython-311.pyc ADDED
Binary file (2.5 kB). View file
 
BrowsingAgent/tools/__pycache__/ExportFile.cpython-313.pyc ADDED
Binary file (2.18 kB). View file
 
BrowsingAgent/tools/__pycache__/GoBack.cpython-311.pyc ADDED
Binary file (1.3 kB). View file
 
BrowsingAgent/tools/__pycache__/GoBack.cpython-313.pyc ADDED
Binary file (1.15 kB). View file
 
BrowsingAgent/tools/__pycache__/ReadURL.cpython-311.pyc ADDED
Binary file (3.01 kB). View file
 
BrowsingAgent/tools/__pycache__/ReadURL.cpython-313.pyc ADDED
Binary file (2.72 kB). View file
 
BrowsingAgent/tools/__pycache__/Scroll.cpython-311.pyc ADDED
Binary file (2.81 kB). View file
 
BrowsingAgent/tools/__pycache__/Scroll.cpython-313.pyc ADDED
Binary file (2.49 kB). View file
 
BrowsingAgent/tools/__pycache__/SearchAndScrape.cpython-311.pyc ADDED
Binary file (2.05 kB). View file
 
BrowsingAgent/tools/__pycache__/SearchAndScrape.cpython-313.pyc ADDED
Binary file (3.7 kB). View file
 
BrowsingAgent/tools/__pycache__/SelectDropdown.cpython-311.pyc ADDED
Binary file (3.79 kB). View file
 
BrowsingAgent/tools/__pycache__/SelectDropdown.cpython-313.pyc ADDED
Binary file (3.35 kB). View file
 
BrowsingAgent/tools/__pycache__/SendKeys.cpython-311.pyc ADDED
Binary file (4.71 kB). View file
 
BrowsingAgent/tools/__pycache__/SendKeys.cpython-313.pyc ADDED
Binary file (4.23 kB). View file
 
BrowsingAgent/tools/__pycache__/SolveCaptcha.cpython-311.pyc ADDED
Binary file (10.9 kB). View file
 
BrowsingAgent/tools/__pycache__/SolveCaptcha.cpython-313.pyc ADDED
Binary file (9.57 kB). View file
 
BrowsingAgent/tools/__pycache__/WebPageSummarizer.cpython-311.pyc ADDED
Binary file (2.48 kB). View file
 
BrowsingAgent/tools/__pycache__/WebPageSummarizer.cpython-313.pyc ADDED
Binary file (2.25 kB). View file
 
BrowsingAgent/tools/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (721 Bytes). View file
 
BrowsingAgent/tools/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (563 Bytes). View file
 
BrowsingAgent/tools/util/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .get_b64_screenshot import get_b64_screenshot
2
+ from .selenium import get_web_driver, set_web_driver
3
+ from .highlights import remove_highlight_and_labels, highlight_elements_with_labels
BrowsingAgent/tools/util/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (539 Bytes). View file
 
BrowsingAgent/tools/util/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (443 Bytes). View file
 
BrowsingAgent/tools/util/__pycache__/get_b64_screenshot.cpython-311.pyc ADDED
Binary file (581 Bytes). View file
 
BrowsingAgent/tools/util/__pycache__/get_b64_screenshot.cpython-313.pyc ADDED
Binary file (539 Bytes). View file
 
BrowsingAgent/tools/util/__pycache__/highlights.cpython-311.pyc ADDED
Binary file (5.89 kB). View file