Spaces:
Runtime error
Runtime error
| import os | |
| from bs4 import BeautifulSoup | |
| from dotenv import load_dotenv | |
| from smolagents import CodeAgent, DuckDuckGoSearchTool, PythonInterpreterTool, tool | |
| import requests | |
| load_dotenv() | |
| DEFAULT_URL = os.getenv("DEFAULT_URL") | |
| def visit_webpage(url: str) -> dict: | |
| """ | |
| Visits a webpage and extracts clean text from it. | |
| Args: | |
| url: The URL of the page to visit (e.g., https://en.wikipedia.org/wiki/OpenAI) | |
| Returns: | |
| dict: A dictionary containing: | |
| - "text": Truncated page content | |
| - "url": The original URL | |
| - "status": HTTP status or error info | |
| """ | |
| try: | |
| response = requests.get(url, timeout=10) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| text = soup.get_text(separator="\n", strip=True) | |
| short_text = text[:3000] | |
| print(f"β Extracted {len(short_text)} characters from {url}") | |
| return { | |
| "text": short_text, | |
| "url": url, | |
| "status": f"Success ({response.status_code})" | |
| } | |
| except Exception as e: | |
| return { | |
| "text": "", | |
| "url": url, | |
| "status": f"Error: {e}" | |
| } | |
| def fetch_file(task_id: str) -> dict: | |
| """ | |
| Fetches a file from the task Api. | |
| Args: | |
| task_id: The ID of the task to fetch. | |
| Returns: | |
| dict: task_id, content, status | |
| """ | |
| url = f"{DEFAULT_URL}/files/{task_id}" | |
| try: | |
| response = requests.get(url, timeout=10) | |
| response.raise_for_status() | |
| return { | |
| "task_id": task_id, | |
| "content": response.text[:5000], | |
| "status": f"Success ({response.status_code})" | |
| } | |
| except Exception as e: | |
| return { | |
| "task_id": task_id, | |
| "content": "", | |
| "status": f"Error: {e}" | |
| } | |
| class Agent: | |
| def __init__(self, model): | |
| self.agent = CodeAgent( | |
| model=model, | |
| tools=[DuckDuckGoSearchTool(), PythonInterpreterTool(), visit_webpage, fetch_file], | |
| additional_authorized_imports=['pandas', 'numpy', 'csv', 'PIL', 're'], | |
| max_steps=10 | |
| ) | |
| def __call__(self, question: str, taskid: str): | |
| self.agent_prompt = f""" | |
| You will be given a question. | |
| Answer it as concise as possible for example: | |
| What is capital of Poland? | |
| Warsaw | |
| If you don't know the answer, just say "I don't know". | |
| You can use the following tools: | |
| - DuckDuckGoSearchTool: for searching the web... | |
| ... | |
| Example usage: | |
| web_search('Your search') | |
| - PythonInterpreterTool(): for calculations or parsing text/numbers. | |
| - visit_webpage(): for opening URLs and reading page content. | |
| Answer in max 10 steps. | |
| If you need to fetch a file, use fetch_file({taskid}) | |
| The question is: {question} | |
| """ | |
| task_prompt = self.agent_prompt + question | |
| answer = self.agent.run(task_prompt) | |
| return answer | |