import io
import os
import re
import sys
from typing import List, Callable, Any

import openai
import pandas as pd
import requests
from dotenv import load_dotenv
from google import genai
from google.genai import types
from langchain_community.document_loaders import WebBaseLoader, ImageCaptionLoader, WikipediaLoader, ArxivLoader
from langchain_community.tools import DuckDuckGoSearchResults
from langchain_core.tools import tool
from langchain_text_splitters import CharacterTextSplitter

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

load_dotenv()


@tool(description="Multiply two integers and return the result")
def multiply(a: int, b: int) -> int:
    return a * b


@tool(description="Add two integers and return the result")
def add(a: int, b: int) -> int:
    return a + b


@tool(description="Subtract the second integer from the first and return the result")
def subtract(a: int, b: int) -> int:
    return a - b


@tool(
    description="Divide the first integer by the second and return the result; raises an error if the second integer is zero")
def divide(a: int, b: int) -> float:
    if b == 0:
        raise ValueError("Cannot divide by zero.")
    return a / b


@tool(description="Return the remainder of dividing the first integer by the second")
def modulus(a: int, b: int) -> int:
    return a % b


@tool(description="""
    Searches for a Wikipedia articles using the provided query and returns the content of the corresponding Wikipedia pages.
    Args:
        query (str): The search term to look up on Wikipedia.
    Returns:
        str: The text content of the Wikipedia articles related to the query.
    """)
def wiki_search(query: str) -> str:
    print("wiki_search called with:", query)
    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
    formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
            for doc in search_docs
        ])
    return formatted_search_docs


@tool(description="""
    Fetches raw HTML content of a web page.
    Args:
        url: the webpage url
    Returns:
        str: The combined raw text content of the webpage
    """)
def visit_webpage(url: str) -> str:
    try:
        response = requests.get(url, timeout=5)
        return response.text[:5000]
    except Exception as e:
        return f"[ERROR fetching {url}]: {str(e)}"


@tool(description="""
    Performs a web search using the given query, downloads the content of two relevant web pages,
    and returns their combined content as a raw string.
    This is useful when the task requires analysis of web page content, such as retrieving poems, 
    changelogs, or other textual resources.
    Args:
        query (str): The search query.
    Returns:
        str: The combined raw text content of the two retrieved web pages.
    """)
def duckduck_websearch(query: str) -> str:
    search_engine = DuckDuckGoSearchResults(output_format="list", num_results=2)
    results = search_engine.invoke({"query": query})
    page_urls = [url["link"] for url in results]

    loader = WebBaseLoader(web_paths=page_urls)
    docs = loader.load()

    combined_text = "\n\n".join(doc.page_content[:15000] for doc in docs)

    # Clean up excessive newlines, spaces and strip leading/trailing whitespace
    cleaned_text = re.sub(r'\n{3,}', '\n\n', combined_text).strip()
    cleaned_text = re.sub(r'[ \t]{6,}', ' ', cleaned_text)

    # Strip leading/trailing whitespace
    cleaned_text = cleaned_text.strip()
    return cleaned_text


@tool(description="""
    Splits text into chunks using LangChain's CharacterTextSplitter.
    Args:
        text: A string of text to split.
    Returns:
        List[str]: a list of split text
    """)
def text_splitter(text: str) -> List[str]:
    splitter = CharacterTextSplitter(chunk_size=450, chunk_overlap=10)
    return splitter.split_text(text)


@tool(description="""
    First download the file, then read its content
    Args:
        dir: the task_id
    Returns:
        str: the file content
    """)
def read_file(task_id: str) -> str:
    file_url = f'{DEFAULT_API_URL}/files/{task_id}'
    r = requests.get(file_url, timeout=15, allow_redirects=True)
    with open('temp', "wb") as fp:
        fp.write(r.content)
    with open('temp') as f:
        return f.read()


@tool(description="""
    First download the excel file, then read its content
    Args:
        task_id: the task_id
    Returns:
        str: the content of excel file
    """)
def excel_read(task_id: str) -> str:
    try:
        file_url = f'{DEFAULT_API_URL}/files/{task_id}'
        r = requests.get(file_url, timeout=15, allow_redirects=True)
        with open('temp.xlsx', "wb") as fp:
            fp.write(r.content)
        # Read the Excel file
        df = pd.read_excel('temp.xlsx')
        # Run various analyses based on the query
        result = (
            f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
        )
        result += f"Columns: {', '.join(df.columns)}\n\n"
        # Add summary statistics
        result += "Summary statistics:\n"
        result += str(df.describe())
        return result
    except Exception as e:
        return f"Error analyzing Excel file: {str(e)}"


@tool(description="""
    First download the csv file, then read its content
    Args:
        dir: the task_id
    Returns:
        str: the content of csv file
    """)
def csv_read(task_id: str) -> str:
    try:
        file_url = f'{DEFAULT_API_URL}/files/{task_id}'
        r = requests.get(file_url, timeout=15, allow_redirects=True)
        with open('temp.csv', "wb") as fp:
            fp.write(r.content)
        # Read the CSV file
        df = pd.read_csv('temp.csv')
        # Run various analyses based on the query
        result = (
            f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
        )
        result += f"Columns: {', '.join(df.columns)}\n\n"
        # Add summary statistics
        result += "Summary statistics:\n"
        result += str(df.describe())
        return result
    except Exception as e:
        return f"Error analyzing CSV file: {str(e)}"


@tool(description="""
    Understand the content of the provided image
    Args:
        dir: the image url link
    Returns:
        str: the image caption
    """)
def image_caption(task_id: str) -> str:
    file_url = f'{DEFAULT_API_URL}/files/{task_id}'
    loader = ImageCaptionLoader(images=[file_url])
    metadata = loader.load()
    return metadata[0].page_content


@tool(description="""
    Analyzes a YouTube video from the provided URL and returns an answer 
    to the given question based on the analysis results.
    Args:
        youtube_url (str): The URL of the YouTube video, in the format 
            "https://www.youtube.com/...".
        question (str): A question related to the content of the video.
    Returns:
        str: An answer to the question based on the video's content.
    """)
def youtube_search(youtube_url: str, question: str) -> str:
    client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
    response = client.models.generate_content(
        model='models/gemini-2.5-flash',
        contents=types.Content(
            parts=[
                types.Part(
                    file_data=types.FileData(file_uri=youtube_url)
                ),
                types.Part(text=question)
            ]
        )
    )
    return response.text


@tool(description=
      """Search Arxiv for a query and return maximum 3 result.
    Args:
        query: The search query.""")
def arvix_search(query: str) -> str:
    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
    formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
            for doc in search_docs
        ])
    return formatted_search_docs


@tool(description= """
    First download the mp3 file, then listen to it
    
    Args:
        dir: the task_id
    
    Returns:
        str: the content of mp3 file
    """)
def whisper_transcribe_api(task_id: str) -> str:
    openai.api_key = os.getenv("OPENAI_API_KEY")
    file_url = f'{DEFAULT_API_URL}/files/{task_id}'

    try:
        r = requests.get(file_url, timeout=15, allow_redirects=True)
        temp_path = 'temp.mp3'
        with open(temp_path, "wb") as fp:
            fp.write(r.content)
        with open(temp_path, "rb") as audio_file:
            transcript = openai.audio.transcriptions.create(
                file=audio_file,
                model="whisper-1"
            )
        return transcript.text
    except Exception as e:
        return f"Error transcribing audio: {e}"


@tool(description="""
Execute Python code from a file identified by task_id and file_name.
Returns the numeric result if defined, otherwise stdout.
""")
def run_python_file(task_id: str, file_name: str) -> str:
    file_path = file_name
    buffer = io.StringIO()
    old_stdout = sys.stdout
    ns = {"__builtins__": __builtins__, "__name__": "__main__"}
    try:
        file_url = f"{DEFAULT_API_URL}/files/{task_id}"
        r = requests.get(file_url, timeout=15, allow_redirects=True)
        if r.status_code != 200:
            return f"❌ Failed to download file: {r.status_code}"

        with open(file_path, "wb") as f:
            f.write(r.content)

        with open(file_path, "r", encoding="utf-8", errors="replace") as f:
            code = f.read()

        sys.stdout = buffer
        try:
            compiled = compile(code, file_path, "exec")
            exec(compiled, ns, ns)
        finally:
            sys.stdout = old_stdout

        if "result" in ns:
            return str(ns["result"])
        else:
            output = buffer.getvalue().strip()
            return output or "No output produced."

    except Exception as e:
        # Prefer returning a computed result or any partial stdout if available
        try:
            sys.stdout = old_stdout
        except Exception:
            pass
        if "result" in ns:
            return str(ns["result"])
        output = buffer.getvalue().strip()
        if output:
            return output
        return f"❌ Error executing Python file: {e}"
    finally:
        # Ensure the downloaded code file is removed after execution
        try:
            if os.path.exists(file_path):
                os.remove(file_path)
        except Exception:
            pass


TOOLS: List[Callable[..., Any]] = [
    multiply,
    add,
    subtract,
    divide,
    modulus,
    duckduck_websearch,
    arvix_search,
    wiki_search,
    visit_webpage,
    youtube_search,
    text_splitter,
    read_file,
    excel_read,
    csv_read,
    image_caption,
    whisper_transcribe_api,
    run_python_file
]