import os from bs4 import BeautifulSoup from dotenv import load_dotenv from smolagents import CodeAgent, DuckDuckGoSearchTool, PythonInterpreterTool, tool import requests load_dotenv() DEFAULT_URL = os.getenv("DEFAULT_URL") @tool def visit_webpage(url: str) -> dict: """ Visits a webpage and extracts clean text from it. Args: url: The URL of the page to visit (e.g., https://en.wikipedia.org/wiki/OpenAI) Returns: dict: A dictionary containing: - "text": Truncated page content - "url": The original URL - "status": HTTP status or error info """ try: response = requests.get(url, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") text = soup.get_text(separator="\n", strip=True) short_text = text[:3000] print(f"✅ Extracted {len(short_text)} characters from {url}") return { "text": short_text, "url": url, "status": f"Success ({response.status_code})" } except Exception as e: return { "text": "", "url": url, "status": f"Error: {e}" } @tool def fetch_file(task_id: str) -> dict: """ Fetches a file from the task Api. Args: task_id: The ID of the task to fetch. Returns: dict: task_id, content, status """ url = f"{DEFAULT_URL}/files/{task_id}" try: response = requests.get(url, timeout=10) response.raise_for_status() return { "task_id": task_id, "content": response.text[:5000], "status": f"Success ({response.status_code})" } except Exception as e: return { "task_id": task_id, "content": "", "status": f"Error: {e}" } class Agent: def __init__(self, model): self.agent = CodeAgent( model=model, tools=[DuckDuckGoSearchTool(), PythonInterpreterTool(), visit_webpage, fetch_file], additional_authorized_imports=['pandas', 'numpy', 'csv', 'PIL', 're'], max_steps=10 ) def __call__(self, question: str, taskid: str): self.agent_prompt = f""" You will be given a question. Answer it as concise as possible for example: What is capital of Poland? Warsaw If you don't know the answer, just say "I don't know". You can use the following tools: - DuckDuckGoSearchTool: for searching the web... ... Example usage: web_search('Your search') - PythonInterpreterTool(): for calculations or parsing text/numbers. - visit_webpage(): for opening URLs and reading page content. Answer in max 10 steps. If you need to fetch a file, use fetch_file({taskid}) The question is: {question} """ task_prompt = self.agent_prompt + question answer = self.agent.run(task_prompt) return answer