"""Full app.py with improved Excel-handling guidelines for GAIA agent.
Copy/paste into your Hugging Face Space.
"""

import os
import requests
import pandas as pd
import gradio as gr
import operator
from typing import Sequence, Annotated, TypedDict

from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
from langchain_experimental.tools import PythonREPLTool
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.tools.youtube.search import YouTubeSearchTool
from langchain_openai import ChatOpenAI
from langgraph.graph import StateGraph
from langgraph.prebuilt import ToolNode, tools_condition

# -----------------------  Agent Definition  ----------------------------------

class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], operator.add]

SYSTEM_PROMPT = (
    "You are a GAIA evaluation agent. For each question, think step‑by‑step, but only output the final answer with the template:\n"
    "FINAL ANSWER: [YOUR FINAL ANSWER]\n\n"
    "Formatting rules: Your FINAL ANSWER must be a single number, a single short string, or a comma‑separated list, as the task dictates. No extra words.\n\n"
    "**IMPORTANT TOOL USAGE**:\n"
    "• You have a PythonREPL tool with pandas pre‑installed. If the task references an Excel / CSV file path (e.g. .xlsx, .xls, .csv), do the following:\n"
    "    1. Call PythonREPL and load the file with `pd.read_excel(<path>)` or `pd.read_csv(<path>)`.\n"
    "    2. Use pandas operations (sum, mean, filtering etc.) to compute the required value.\n"
    "    3. Return the numeric/string result in the FINAL ANSWER template.\n\n"
    "• Use TavilySearchResults for web look‑ups, YouTubeSearchTool for video queries.\n"
    "• If the task involves code execution or math, use PythonREPL.\n"
)


def create_langgraph_agent():
    llm = ChatOpenAI(model="gpt-4o", temperature=0)

    tools = [
        TavilySearchResults(max_results=3),
        PythonREPLTool(),
        YouTubeSearchTool(),
    ]

    # Optional FileManagement toolkit
    try:
        from langchain_community.agent_toolkits.file_management.toolkit import FileManagementToolkit
        tools.extend(FileManagementToolkit(root_dir=".").get_tools())
    except Exception:
        pass

    llm_with_tools = llm.bind_tools(tools)

    def agent_node(state: AgentState):
        msgs = [SystemMessage(content=SYSTEM_PROMPT)] + list(state["messages"])
        reply = llm_with_tools.invoke(msgs)
        return {"messages": [reply]}

    graph = StateGraph(AgentState)
    graph.add_node("agent", agent_node)
    graph.add_node("tools", ToolNode(tools))
    graph.set_entry_point("agent")
    graph.add_conditional_edges("agent", tools_condition)
    graph.add_edge("tools", "agent")

    return graph.compile()

# ------------------  Helper to run one question  -----------------------------

def run_agent(agent_exec, question: str) -> str:
    try:
        result = agent_exec.invoke(
            {"messages": [HumanMessage(content=question)]},
            config={"recursion_limit": 15},
        )
        text = result["messages"][-1].content
        return text.split("FINAL ANSWER:")[-1].strip() if "FINAL ANSWER:" in text else text
    except Exception as e:
        return f"Error: {e}"

# ------------------  Evaluation & Submission  --------------------------------

def run_and_submit_all(profile: gr.OAuthProfile | None):
    if not profile:
        return "Please login first.", None

    for key in ("OPENAI_API_KEY", "TAVILY_API_KEY"):
        if not os.getenv(key):
            return f"Missing {key} env var.", None

    try:
        agent_exec = create_langgraph_agent()
    except Exception as e:
        return f"Init error: {e}", None

    Q_URL = "https://agents-course-unit4-scoring.hf.space/questions"
    S_URL = "https://agents-course-unit4-scoring.hf.space/submit"

    try:
        questions = requests.get(Q_URL, timeout=20).json()
    except Exception as e:
        return f"Fetch error: {e}", None

    answers = []
    for q in questions:
        if q.get("task_id") and q.get("question"):
            answers.append({
                "task_id": q["task_id"],
                "submitted_answer": run_agent(agent_exec, q["question"]),
            })

    payload = {
        "username": profile.username,
        "agent_code": "HF_Space_Link",  # not required for scoring
        "answers": answers,
    }

    try:
        res = requests.post(S_URL, json=payload, timeout=240).json()
        status = (
            f"Score: {res.get('score', 'N/A')}% ({res.get('correct_count')}/" \
            f"{res.get('total_attempted')})\nMessage: {res.get('message', '')}"
        )
        return status, pd.DataFrame(answers)
    except Exception as e:
        return f"Submit error: {e}", pd.DataFrame(answers)

# -----------------------------  UI  -----------------------------------------

with gr.Blocks() as demo:
    gr.Markdown("# GAIA Agent Runner – Excel‑aware")
    gr.LoginButton()
    btn = gr.Button("Run & Submit")
    out1 = gr.Textbox(label="Status", lines=4)
    out2 = gr.DataFrame(label="Answers", wrap=True)
    btn.click(fn=run_and_submit_all, outputs=[out1, out2])

if __name__ == "__main__":
    demo.launch()