Final_Assignment_Template / agent /file_manager.py
Niraya666's picture
Upload 2 files
70376f3 verified
"""File attachment management for GAIA benchmark.
Handles downloading and managing attachment files for questions.
"""
import os
import requests
from pathlib import Path
from typing import Optional, Dict, Any
ATTACHMENTS_DIR = Path("attachments")
ATTACHMENTS_DIR.mkdir(exist_ok=True)
def download_file(url: str, local_path: Path, timeout: int = 30) -> bool:
"""Download a file from URL to local path.
Args:
url: The URL to download from
local_path: Where to save the file
timeout: Request timeout in seconds
Returns:
True if successful, False otherwise
"""
try:
response = requests.get(url, timeout=timeout, stream=True)
response.raise_for_status()
# Ensure parent directory exists
local_path.parent.mkdir(parents=True, exist_ok=True)
# Download in chunks to handle large files
with open(local_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
return True
except Exception as e:
print(f"Failed to download {url}: {e}")
return False
def get_attachment_path(task_id: str, filename: str) -> Path:
"""Get the local path for an attachment.
Args:
task_id: The task/question ID
filename: The original filename
Returns:
Path where the file should be stored
"""
task_dir = ATTACHMENTS_DIR / task_id
task_dir.mkdir(parents=True, exist_ok=True)
return task_dir / filename
def download_attachment(question_data: Dict[str, Any]) -> Optional[str]:
"""Download attachment for a question if it exists.
Args:
question_data: Dictionary containing question info including
task_id, file_name, file_path
Returns:
Local path to the downloaded file, or None if no attachment
or download failed.
"""
task_id = question_data.get("task_id")
file_name = question_data.get("file_name")
file_path = question_data.get("file_path")
if not file_name:
return None
# Check if file already exists locally
local_path = get_attachment_path(task_id, file_name)
if local_path.exists():
return str(local_path)
# If no file_path URL provided, we can't download
if not file_path:
# Try to construct a URL based on task_id and filename
# This is a fallback for the GAIA benchmark API
base_url = "https://agents-course-unit4-scoring.hf.space"
file_path = f"{base_url}/files/{task_id}/{file_name}"
print(f"No file_path provided, trying constructed URL: {file_path}")
# Download the file
if download_file(file_path, local_path):
print(f"Downloaded {file_name} to {local_path}")
return str(local_path)
else:
print(f"Failed to download attachment for {task_id}")
return None
def prepare_question_with_attachments(question_data: Dict[str, Any]) -> Dict[str, Any]:
"""Prepare a question by downloading attachments and updating the question text.
Args:
question_data: Raw question data from API
Returns:
Updated question data with local file path and modified question text
"""
# Make a copy to avoid modifying the original
prepared = question_data.copy()
# Download attachment if exists
local_path = download_attachment(question_data)
if local_path:
prepared["local_file_path"] = local_path
# Get file extension
file_name = question_data.get("file_name", "")
ext = Path(file_name).suffix.lower()
# Add context to question based on file type
question = prepared.get("question", "")
if ext == '.mp3':
prepared["question"] = (
f"{question}\n\n"
f"[Audio file is available at: {local_path}. "
f"Use the file_read tool to access and transcribe it.]"
)
elif ext == '.py':
prepared["question"] = (
f"{question}\n\n"
f"[Python code file is available at: {local_path}. "
f"Use the file_read tool to read and execute it.]"
)
elif ext in ['.xlsx', '.xls', '.csv']:
prepared["question"] = (
f"{question}\n\n"
f"[Excel/spreadsheet file is available at: {local_path}. "
f"Use the file_read tool to load and analyze it.]"
)
elif ext in ['.png', '.jpg', '.jpeg', '.gif']:
prepared["question"] = (
f"{question}\n\n"
f"[Image file is available at: {local_path}. "
f"Use the read_image tool to analyze it.]"
)
elif ext == '.txt':
prepared["question"] = (
f"{question}\n\n"
f"[Text file is available at: {local_path}. "
f"Use the file_read tool to read it.]"
)
else:
prepared["question"] = (
f"{question}\n\n"
f"[Attachment file is available at: {local_path}. "
f"Use the appropriate tool to read it.]"
)
return prepared
def cleanup_attachments():
"""Remove all downloaded attachments."""
import shutil
if ATTACHMENTS_DIR.exists():
shutil.rmtree(ATTACHMENTS_DIR)
ATTACHMENTS_DIR.mkdir(exist_ok=True)
print("Cleaned up all attachments")
# For testing
if __name__ == "__main__":
# Test with a sample question
test_question = {
"task_id": "test-123",
"question": "What is in this file?",
"file_name": "test.txt",
"file_path": "https://example.com/test.txt"
}
result = prepare_question_with_attachments(test_question)
print(f"Prepared question: {result.get('question')}")
print(f"Local path: {result.get('local_file_path')}")