File size: 5,268 Bytes
bfd7e4d
 
4868771
 
88a1dd9
 
 
4868771
2557915
4868771
 
662cd6f
33a25ca
635f2c2
4868771
b028a1b
2c1cc68
2557915
88a1dd9
bfd7e4d
88a1dd9
2557915
 
 
4868771
bfd7e4d
 
 
 
 
 
 
 
 
 
4868771
2c1cc68
bd8cd5d
bfd7e4d
662cd6f
 
7c9fdd8
 
 
 
 
b028a1b
bfd7e4d
2b8e507
 
2c1cc68
bfd7e4d
 
2a0a2a5
e6e5669
88a1dd9
4868771
bfd7e4d
 
 
bd8cd5d
2557915
 
2c1cc68
2557915
0bae48f
2557915
 
bfd7e4d
4868771
bfd7e4d
88a1dd9
bfd7e4d
4868771
bfd7e4d
4868771
 
 
bfd7e4d
 
 
 
4868771
bfd7e4d
4868771
 
 
bfd7e4d
4868771
bfd7e4d
 
 
4868771
 
 
 
bfd7e4d
4868771
bfd7e4d
 
4868771
 
bfd7e4d
4868771
bfd7e4d
4868771
 
bfd7e4d
 
 
 
 
 
4868771
 
bfd7e4d
 
4868771
 
 
 
bfd7e4d
4868771
bfd7e4d
 
4868771
 
 
bfd7e4d
4868771
bfd7e4d
4868771
 
bfd7e4d
4868771
bfd7e4d
 
 
 
4868771
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
"""Full app.py with improved Excel-handling guidelines for GAIA agent.
Copy/paste into your Hugging Face Space.
"""

import os
import requests
import pandas as pd
import gradio as gr
import operator
from typing import Sequence, Annotated, TypedDict

from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
from langchain_experimental.tools import PythonREPLTool
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.tools.youtube.search import YouTubeSearchTool
from langchain_openai import ChatOpenAI
from langgraph.graph import StateGraph
from langgraph.prebuilt import ToolNode, tools_condition

# -----------------------  Agent Definition  ----------------------------------

class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], operator.add]

SYSTEM_PROMPT = (
    "You are a GAIA evaluation agent. For each question, think step‑by‑step, but only output the final answer with the template:\n"
    "FINAL ANSWER: [YOUR FINAL ANSWER]\n\n"
    "Formatting rules: Your FINAL ANSWER must be a single number, a single short string, or a comma‑separated list, as the task dictates. No extra words.\n\n"
    "**IMPORTANT TOOL USAGE**:\n"
    "• You have a PythonREPL tool with pandas pre‑installed. If the task references an Excel / CSV file path (e.g. .xlsx, .xls, .csv), do the following:\n"
    "    1. Call PythonREPL and load the file with `pd.read_excel(<path>)` or `pd.read_csv(<path>)`.\n"
    "    2. Use pandas operations (sum, mean, filtering etc.) to compute the required value.\n"
    "    3. Return the numeric/string result in the FINAL ANSWER template.\n\n"
    "• Use TavilySearchResults for web look‑ups, YouTubeSearchTool for video queries.\n"
    "• If the task involves code execution or math, use PythonREPL.\n"
)


def create_langgraph_agent():
    llm = ChatOpenAI(model="gpt-4o", temperature=0)

    tools = [
        TavilySearchResults(max_results=3),
        PythonREPLTool(),
        YouTubeSearchTool(),
    ]

    # Optional FileManagement toolkit
    try:
        from langchain_community.agent_toolkits.file_management.toolkit import FileManagementToolkit
        tools.extend(FileManagementToolkit(root_dir=".").get_tools())
    except Exception:
        pass

    llm_with_tools = llm.bind_tools(tools)

    def agent_node(state: AgentState):
        msgs = [SystemMessage(content=SYSTEM_PROMPT)] + list(state["messages"])
        reply = llm_with_tools.invoke(msgs)
        return {"messages": [reply]}

    graph = StateGraph(AgentState)
    graph.add_node("agent", agent_node)
    graph.add_node("tools", ToolNode(tools))
    graph.set_entry_point("agent")
    graph.add_conditional_edges("agent", tools_condition)
    graph.add_edge("tools", "agent")

    return graph.compile()

# ------------------  Helper to run one question  -----------------------------

def run_agent(agent_exec, question: str) -> str:
    try:
        result = agent_exec.invoke(
            {"messages": [HumanMessage(content=question)]},
            config={"recursion_limit": 15},
        )
        text = result["messages"][-1].content
        return text.split("FINAL ANSWER:")[-1].strip() if "FINAL ANSWER:" in text else text
    except Exception as e:
        return f"Error: {e}"

# ------------------  Evaluation & Submission  --------------------------------

def run_and_submit_all(profile: gr.OAuthProfile | None):
    if not profile:
        return "Please login first.", None

    for key in ("OPENAI_API_KEY", "TAVILY_API_KEY"):
        if not os.getenv(key):
            return f"Missing {key} env var.", None

    try:
        agent_exec = create_langgraph_agent()
    except Exception as e:
        return f"Init error: {e}", None

    Q_URL = "https://agents-course-unit4-scoring.hf.space/questions"
    S_URL = "https://agents-course-unit4-scoring.hf.space/submit"

    try:
        questions = requests.get(Q_URL, timeout=20).json()
    except Exception as e:
        return f"Fetch error: {e}", None

    answers = []
    for q in questions:
        if q.get("task_id") and q.get("question"):
            answers.append({
                "task_id": q["task_id"],
                "submitted_answer": run_agent(agent_exec, q["question"]),
            })

    payload = {
        "username": profile.username,
        "agent_code": "HF_Space_Link",  # not required for scoring
        "answers": answers,
    }

    try:
        res = requests.post(S_URL, json=payload, timeout=240).json()
        status = (
            f"Score: {res.get('score', 'N/A')}% ({res.get('correct_count')}/" \
            f"{res.get('total_attempted')})\nMessage: {res.get('message', '')}"
        )
        return status, pd.DataFrame(answers)
    except Exception as e:
        return f"Submit error: {e}", pd.DataFrame(answers)

# -----------------------------  UI  -----------------------------------------

with gr.Blocks() as demo:
    gr.Markdown("# GAIA Agent Runner – Excel‑aware")
    gr.LoginButton()
    btn = gr.Button("Run & Submit")
    out1 = gr.Textbox(label="Status", lines=4)
    out2 = gr.DataFrame(label="Answers", wrap=True)
    btn.click(fn=run_and_submit_all, outputs=[out1, out2])

if __name__ == "__main__":
    demo.launch()