# type: ignore from agents import Agent, RunContextWrapper, function_tool from model import get_model import re import requests from bs4 import BeautifulSoup from markdownify import markdownify from requests.exceptions import RequestException from langchain_community.tools import DuckDuckGoSearchResults from urllib.parse import urljoin @function_tool async def extract_og_image(url: str) -> str: """Extracts the Open Graph (OG) image from a given website URL.""" try: resp = requests.get(url, timeout=10) resp.raise_for_status() soup = BeautifulSoup(resp.text, "html.parser") og_image = soup.find("meta", property="og:image") if og_image and og_image.get("content"): return urljoin(url, og_image["content"]) return "No OG image found" except Exception as e: return f"Error extracting OG image: {e}" @function_tool def collect_theme_data(url: str) -> dict: """Collects raw theme-related data from a website.""" try: resp = requests.get(url, timeout=10) resp.raise_for_status() soup = BeautifulSoup(resp.text, "html.parser") theme_data = { "meta_theme": [], "inline_styles": [], "linked_css": [] } # Meta theme-color for meta in soup.find_all("meta", attrs={"name": "theme-color"}): theme_data["meta_theme"].append(meta.get("content")) # Inline styles for tag in soup.find_all(style=True): theme_data["inline_styles"].append(tag["style"]) # Linked CSS files for link in soup.find_all("link", rel="stylesheet"): href = link.get("href") if href: if not href.startswith(("http://", "https://")): href = urljoin(url, href) theme_data["linked_css"].append(href) return theme_data except Exception as e: return {"error": str(e)} @function_tool async def extract_text_tool(url: str) -> str: """Visits a webpage and returns its content as markdown.""" try: response = requests.get(url) response.raise_for_status() markdown_content = markdownify(response.text).strip() markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content) return markdown_content except RequestException as e: return f"Error fetching the webpage: {str(e)}" except Exception as e: return f"An unexpected error occurred: {str(e)}" @function_tool async def Web_search_tool(query: str): """Performs a web search using DuckDuckGo.""" try: print("Searching the web for:", query) search = DuckDuckGoSearchResults(output_format="list") results = search.invoke(query) return results except Exception as e: print(e) return f"An unexpected error occurred: {str(e)}" def webInspectorPrompt(context: RunContextWrapper, agent: Agent) -> str: return """ You are WebInspector Agent. Your role is to extract and analyze data from websites. You can use the available tools to load web pages and retrieve content such as: - Page text and headings - Colors (from inline styles, CSS, or computed values) - Links and metadata (title, description, keywords) - Layout or structural information (DOM hierarchy, tag types) Guidelines: - Always extract text in a clean and structured format. - If asked about styles (like colors, fonts), parse them from the HTML/CSS. - Provide clear summaries when possible, instead of raw HTML. - When following links, only fetch up to N pages to avoid overload. - If extraction fails, explain the reason. """ WebInspectorAgent = Agent( name="WebInspectorAgent", instructions=webInspectorPrompt, model=get_model('gemini-2.0-flash'), tools=[collect_theme_data, extract_text_tool, Web_search_tool] )