"""File attachment management for GAIA benchmark.

Handles downloading and managing attachment files for questions.
"""

import os
import requests
from pathlib import Path
from typing import Optional, Dict, Any

ATTACHMENTS_DIR = Path("attachments")
ATTACHMENTS_DIR.mkdir(exist_ok=True)


def download_file(url: str, local_path: Path, timeout: int = 30) -> bool:
    """Download a file from URL to local path.

    Args:
        url: The URL to download from
        local_path: Where to save the file
        timeout: Request timeout in seconds

    Returns:
        True if successful, False otherwise
    """
    try:
        response = requests.get(url, timeout=timeout, stream=True)
        response.raise_for_status()

        # Ensure parent directory exists
        local_path.parent.mkdir(parents=True, exist_ok=True)

        # Download in chunks to handle large files
        with open(local_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)

        return True
    except Exception as e:
        print(f"Failed to download {url}: {e}")
        return False


def get_attachment_path(task_id: str, filename: str) -> Path:
    """Get the local path for an attachment.

    Args:
        task_id: The task/question ID
        filename: The original filename

    Returns:
        Path where the file should be stored
    """
    task_dir = ATTACHMENTS_DIR / task_id
    task_dir.mkdir(parents=True, exist_ok=True)
    return task_dir / filename


def download_attachment(question_data: Dict[str, Any]) -> Optional[str]:
    """Download attachment for a question if it exists.

    Args:
        question_data: Dictionary containing question info including
                      task_id, file_name, file_path

    Returns:
        Local path to the downloaded file, or None if no attachment
        or download failed.
    """
    task_id = question_data.get("task_id")
    file_name = question_data.get("file_name")
    file_path = question_data.get("file_path")

    if not file_name:
        return None

    # Check if file already exists locally
    local_path = get_attachment_path(task_id, file_name)
    if local_path.exists():
        return str(local_path)

    # If no file_path URL provided, we can't download
    if not file_path:
        # Try to construct a URL based on task_id and filename
        # This is a fallback for the GAIA benchmark API
        base_url = "https://agents-course-unit4-scoring.hf.space"
        file_path = f"{base_url}/files/{task_id}/{file_name}"
        print(f"No file_path provided, trying constructed URL: {file_path}")

    # Download the file
    if download_file(file_path, local_path):
        print(f"Downloaded {file_name} to {local_path}")
        return str(local_path)
    else:
        print(f"Failed to download attachment for {task_id}")
        return None


def prepare_question_with_attachments(question_data: Dict[str, Any]) -> Dict[str, Any]:
    """Prepare a question by downloading attachments and updating the question text.

    Args:
        question_data: Raw question data from API

    Returns:
        Updated question data with local file path and modified question text
    """
    # Make a copy to avoid modifying the original
    prepared = question_data.copy()

    # Download attachment if exists
    local_path = download_attachment(question_data)

    if local_path:
        prepared["local_file_path"] = local_path

        # Get file extension
        file_name = question_data.get("file_name", "")
        ext = Path(file_name).suffix.lower()

        # Add context to question based on file type
        question = prepared.get("question", "")

        if ext == '.mp3':
            prepared["question"] = (
                f"{question}\n\n"
                f"[Audio file is available at: {local_path}. "
                f"Use the file_read tool to access and transcribe it.]"
            )
        elif ext == '.py':
            prepared["question"] = (
                f"{question}\n\n"
                f"[Python code file is available at: {local_path}. "
                f"Use the file_read tool to read and execute it.]"
            )
        elif ext in ['.xlsx', '.xls', '.csv']:
            prepared["question"] = (
                f"{question}\n\n"
                f"[Excel/spreadsheet file is available at: {local_path}. "
                f"Use the file_read tool to load and analyze it.]"
            )
        elif ext in ['.png', '.jpg', '.jpeg', '.gif']:
            prepared["question"] = (
                f"{question}\n\n"
                f"[Image file is available at: {local_path}. "
                f"Use the read_image tool to analyze it.]"
            )
        elif ext == '.txt':
            prepared["question"] = (
                f"{question}\n\n"
                f"[Text file is available at: {local_path}. "
                f"Use the file_read tool to read it.]"
            )
        else:
            prepared["question"] = (
                f"{question}\n\n"
                f"[Attachment file is available at: {local_path}. "
                f"Use the appropriate tool to read it.]"
            )

    return prepared


def cleanup_attachments():
    """Remove all downloaded attachments."""
    import shutil
    if ATTACHMENTS_DIR.exists():
        shutil.rmtree(ATTACHMENTS_DIR)
        ATTACHMENTS_DIR.mkdir(exist_ok=True)
        print("Cleaned up all attachments")


# For testing
if __name__ == "__main__":
    # Test with a sample question
    test_question = {
        "task_id": "test-123",
        "question": "What is in this file?",
        "file_name": "test.txt",
        "file_path": "https://example.com/test.txt"
    }

    result = prepare_question_with_attachments(test_question)
    print(f"Prepared question: {result.get('question')}")
    print(f"Local path: {result.get('local_file_path')}")