File size: 2,643 Bytes
980a2ce | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | import re
import os
import requests
from smolagents import Tool
from smolagents.utils import truncate_content
# Define constant for API URL
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
class DownloadTaskAttachmentTool(Tool):
"""Tool for downloading files attached to tasks."""
name = "download_file"
description = "Downloads the file attached to the task ID"
inputs = {'task_id': {'type': 'string', 'description': 'The task id to download attachment from.'}}
output_type = "string"
def __init__(self, *args, **kwargs):
self.is_initialized = True
def forward(self, task_id: str) -> str:
"""Download a file associated with the given task ID."""
file_url = f"{DEFAULT_API_URL}/files/{task_id}"
local_file_path = f"downloads/{task_id}.file"
try:
# Create downloads directory if it doesn't exist
os.makedirs("downloads", exist_ok=True)
# Download the file
with requests.get(file_url, stream=True, timeout=15) as response:
response.raise_for_status()
with open(local_file_path, "wb") as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
return local_file_path
except Exception as e:
print(f"Error downloading file for task {task_id}: {e}")
raise
class VisitWebpageTool(Tool):
"""Tool for visiting webpages and converting content to markdown."""
name = "visit_webpage"
description = "Visits a webpage and returns its content as markdown"
inputs = {'url': {'type': 'string', 'description': 'The URL to visit'}}
output_type = "string"
def __init__(self, *args, **kwargs):
self.is_initialized = True
def forward(self, url: str) -> str:
"""Visit a webpage and return its content as markdown."""
try:
# Import here to avoid dependency issues
from markdownify import markdownify
# Fetch webpage content
response = requests.get(url, timeout=20)
response.raise_for_status()
# Convert HTML to markdown and clean up
content = markdownify(response.text).strip()
content = re.sub(r"\n{3,}", "\n\n", content)
# Truncate to avoid excessively long responses
return truncate_content(content, 10000)
except requests.exceptions.Timeout:
return "Request timed out. Please check the URL or try again later."
except Exception as e:
return f"Error: {str(e)}"
|