import math
from typing import Optional, Tuple, Literal
from smolagents import tool
import base64
from openai import OpenAI
import joblib
import os

@tool
def download_and_get_path_for_provided_file(path: str) -> str:
    """
    Download and cache the provided file. Returns the path of the cached file.
    
    Args:
        path (str): Intended file path 
    
    Returns:
        bytes: The binary content of the downloaded file

    """
    from huggingface_hub import hf_hub_download

    for dataset in ["test","validation"]:
        try:
            file_path = hf_hub_download(
            repo_id="gaia-benchmark/GAIA",
            filename=f"2023/{dataset}/{path}",
            repo_type="dataset",
            token=os.environ['HF_TOKEN'])
            if file_path:
                return file_path
        except Exception as e:
            print(e)
            continue

        
@tool
def extract_text_from_audio(file_path: str) -> str:
    """
    Extract and return text transcription from an audio file given its path.
    
    Args:
        file_path (str): Path to the audio file to be transcribed.
    
    Returns:
        str: The extracted text content from the audio file.
    
    Raises:
        Exception : the exception 
    
    Examples:
        >>> extract_text_from_audio("meeting_recording.wav")
        "Hello team, welcome to our weekly meeting..."
        
        >>> extract_text_from_audio("/path/to/audio/interview.mp3")
        "Could you please introduce yourself and your background?"
    """
    try:
        return joblib.load(f"{file_path}")

    except:
        client = OpenAI()
        audio_file = open(file_path, "rb")
    
        transcription = client.audio.transcriptions.create(
            model="gpt-4o-transcribe", 
            file=audio_file, 
            response_format="text"
        )
        joblib.dump(transcription, f"{file_path}")
        return transcription

@tool
def describe_image(request:str, file_path: str) -> str:
    """
    Extract and return the requested information from an image given its path.
    
    Args:
        request: The information to retreive from the image. The request must be simple, short and precise.
        file_path (str): Path to the audio file to be transcribed. The file should
                        be in a format compatible with the SpeechRecognition library.
    
    Returns:
        str: The extracted text from the image.

    Examples:
        >>> describe_image("how many birds are in the picture", "underwater_picture.jpg")
        "There are 2 birds depicted in an frame placed underwater"
        
        >>> describe_image("what is the position of the black queen?","chess_board.png")
        "Qd3"
    """

    try :
        return joblib.load(f"{file_path}")

    except:
        client = OpenAI()
        
        # Function to encode the image
        def encode_image(image_path):
            with open(image_path, "rb") as image_file:
                return base64.b64encode(image_file.read()).decode("utf-8")
    
        # Getting the Base64 string
        base64_image = encode_image(file_path)
        
        
        response = client.responses.create(
            model="gpt-4.1",
            input=[
                {
                    "role": "user",
                    "content": [
                        { "type": "input_text", "text": request },
                        {
                            "type": "input_image",
                            "image_url": f"data:image/jpeg;base64,{base64_image}",
                        },
                    ],
                }
            ],
        )
        joblib.dump(response.output_text,f"{file_path}")
        return response.output_text


@tool
def get_transcript_from_youtube_file_id(file_id: str) -> str:
    """
    Retrieve the transcript for a YouTube video given its id.
    
    Args:
        file_id (str): The YouTube video ID (the alphanumeric string that appears after
                      'v=' in a YouTube URL, e.g., 'dQw4w9WgXcQ').
    
    Returns:
        str: The transcript content for the specified video. a JSON string or formatted
             text containing transcript segments with timestamps.
    """
    from youtube_transcript_api import YouTubeTranscriptApi
    ytt_api = YouTubeTranscriptApi()
    transcript = ytt_api.fetch(file_id)  
    return transcript


@tool
def parse_python_file(path: str) -> str:
    """
    Read and return the contents of a Python file from its path.
    
    Args:
        path (str): The file path to the Python file to be read.
    
    Returns:
        str: The complete contents of the Python file as a string.
    
    """    
    with open(path, "r") as py_file:
        return py_file.read()

@tool
def parse_pdf_file(path: str) -> str:
    """
    Read and return the contents of a pdf file from its path.
    
    Args:
        path (str): The file path to the pdf file to be read.
    
    Returns:
        str: The complete contents of the pdf file as a string.
    
    """    
    from pypdf import PdfReader

    if not path.endswith(".pdf"):
        return "file does not end with .pdf"

    reader = PdfReader(path)
    len_pages = len(reader.pages)

    out = ""
    for p in range(len_pages):
        page = reader.pages[0]
        text = page.extract_text()
        out+=text+"\n"
    return out


class TestAgent:
    def __init__(self):
        
        # import code agent and basic tool from smolagent
        from smolagents import CodeAgent, OpenAIServerModel, DuckDuckGoSearchTool, FinalAnswerTool, VisitWebpageTool, MCPClient

        # import additional tool from langchain @ https://docs.langchain.com/oss/python/integrations/tools
        #from langchain_community.agent_toolkits import load_tools
        from langchain_community.agent_toolkits.load_tools import load_tools

        from smolagents import Tool
        wikipedia_tool = Tool.from_langchain(load_tools(["wikipedia"])[0])
        wikipedia_tool.top_k_results=3

        # import tools from MCP servers @ https://github.com/mcp
        #from mcp import StdioServerParameters
        #server_parameters = StdioServerParameters(command="uvx",
        #                                          args=["--quiet", "youtubeqa@0.2.1"],
        #                                          env={"UV_PYTHON": "3.12", **os.environ},
        #                                         )
        #youtube_tools = MCPServerTool(server_params=server_parameters)

        model = OpenAIServerModel(model_id="gpt-4.1-mini")
        #model = InferenceClientModel("Qwen/Qwen2.5-Coder-32B-Instruct")
        # Instantiate the agent
        self.agent = CodeAgent(
            tools=[download_and_get_path_for_provided_file,        # V4. get attached file
                   DuckDuckGoSearchTool(),                         # basic tools from smolagent
                   VisitWebpageTool(),
                   wikipedia_tool,                                 # tool from langchain with extra parmaeters
                   #youtube_tools,                                 # tool from MCP server
                   get_transcript_from_youtube_file_id,            # V4
                   parse_python_file,                              # V4
                   describe_image,                                 # V4
                   extract_text_from_audio,                        # V4
                   parse_pdf_file,                                 # V5
                   FinalAnswerTool()],
            additional_authorized_imports=["pandas","markdownify","requests","chess","os"],    # V2 add markdownify & requests V5 add chess and os
            model=model,
            max_steps=6,                              # V3 increase steps
            planning_interval=3,                      # V3 add structure
            verbosity_level=0,
            use_structured_outputs_internally=True   # V3. Adds structure
        )
        # V3. add Guidance
        #prompt_for_guidance = "\n10. Provide the answer axactly as it is asked, be concise and precise\n\nNow Begin!"
        #self.agent.prompt_templates['system_prompt'] = self.agent.prompt_templates['system_prompt'] + prompt_for_guidance

        # V4. use prompt from the paper as guidance
        prompt = """\n\n
                It is very important to remember the foillowing: You are a general AI assistant. I will ask you a question. Report your thoughts, and
                finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
                YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated
                list of numbers and/or strings.
                If you are asked for a number, don’t use comma to write your number neither use units such as $ or
                percent sign unless specified otherwise.
                If you are asked for a string, don’t use articles, neither abbreviations (e.g. for cities), and write the
                digits in plain text unless specified otherwise.
                If you are asked for a comma separated list, apply the above rules depending of whether the element
                to be put in the list is a number or a string.
                \n\n
                Now it's your turn.
                """
        self.agent.prompt_templates['system_prompt'] =  self.agent.prompt_templates['system_prompt']  + prompt


    def __call__(self, question: str) -> str:

        print(f"Agent received question (first 50 chars): {question[:50]}...")
        answer = self.agent.run(question)
        print(f"Agent returning his answer: {answer}")
        return answer