File size: 8,553 Bytes
10e9b7d
 
eccf8e4
7d65c66
3c4371f
10e9b7d
f3c3f5b
cfde807
f3c3f5b
 
 
cfde807
f3c3f5b
e80aab9
3db6293
e80aab9
f3c3f5b
 
31243f4
f3c3f5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfde807
f3c3f5b
cfde807
 
 
 
 
 
 
 
f3c3f5b
 
31243f4
 
f3c3f5b
 
 
 
 
 
 
cfde807
 
4021bf3
cfde807
31243f4
f3c3f5b
31243f4
 
7d65c66
f3c3f5b
3c4371f
cfde807
 
 
 
 
7e4a06b
cfde807
 
3c4371f
7e4a06b
31243f4
 
e80aab9
31243f4
f3c3f5b
31243f4
cfde807
 
 
 
 
f3c3f5b
eccf8e4
f3c3f5b
7d65c66
31243f4
f3c3f5b
 
e80aab9
cfde807
f3c3f5b
 
 
 
 
 
 
 
 
31243f4
 
f3c3f5b
cfde807
f3c3f5b
cfde807
f3c3f5b
 
 
 
31243f4
f3c3f5b
 
 
cfde807
f3c3f5b
 
e80aab9
 
f3c3f5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31243f4
e80aab9
f3c3f5b
 
 
 
 
 
e80aab9
f3c3f5b
e80aab9
0ee0419
f3c3f5b
 
e80aab9
f3c3f5b
 
 
 
7e4a06b
e80aab9
31243f4
e80aab9
9088b99
7d65c66
e80aab9
31243f4
 
 
e80aab9
 
 
3c4371f
 
f3c3f5b
7d65c66
3c4371f
 
7d65c66
3c4371f
7d65c66
 
f3c3f5b
7d65c66
 
 
 
f3c3f5b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import os
import gradio as gr
import requests
import inspect
import pandas as pd

# Import necessary libraries for LangChain Agent
from langchain_huggingface import HuggingFaceEndpoint
from langchain.agents import AgentExecutor, create_react_agent
from langchain import hub
from langchain_community.utilities import SerpAPIWrapper
from langchain.tools import Tool # Moved to top-level import for clarity and consistent access

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# --- LangChain Agent Definition ---
class GAIAAgent:
    def __init__(self):
        print("GAIAAgent initialized using LangChain.")

        repo_id = "mistralai/Mistral-7B-Instruct-v0.3" 

        self.llm = HuggingFaceEndpoint(
            endpoint_url=f"https://api-inference.huggingface.co/models/{repo_id}", # Explicitly set endpoint URL
            temperature=0.1, # Directly pass model parameters
            max_new_tokens=512, # Directly pass model parameters (common for generation)
            huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN") # Env var name is correct
        )

        # Define tools for the agent
        self.tools = []

        # Initialize SerpAPIWrapper tool
        # It will automatically pick up SERPAPI_API_KEY from environment variables
        self.serpapi_tool = SerpAPIWrapper()

        # Define a LangChain tool function that uses the SerpAPIWrapper
        # The description is crucial for the LLM to know when to use this tool
        web_search_tool = Tool(
            name="Serpapi Search",
            description="useful for when you need to answer questions about current events or facts. Input should be a search query.",
            func=self.serpapi_tool.run,
        )
        self.tools.append(web_search_tool)

        self.prompt = hub.pull("hwchase17/react")

        # Increased max_iterations and max_execution_time for better performance on complex GAIA questions
        self.agent = create_react_agent(self.llm, self.tools, self.prompt)
        self.agent_executor = AgentExecutor(
            agent=self.agent,
            tools=self.tools,
            verbose=True,
            handle_parsing_errors=True,
            max_iterations=25, # Increased from default, allow more thinking steps
            max_execution_time=180.0 # Increased from default, allow more time per question (3 minutes)
        )


    def __call__(self, question: str) -> str:
        print(f"Agent received question (first 50 chars): {question[:50]}...")
        try:
            response = self.agent_executor.invoke({"input": question})
            agent_answer = response["output"]
            print(f"Agent returning answer: {agent_answer}")
            return agent_answer
        except Exception as e:
            print(f"Error during agent execution: {e}")
            # Ensure a clean error message is returned when agent execution fails
            return f"Agent execution failed: {e}. Check tool outputs and LLM reasoning."

def run_and_submit_all(profile: gr.OAuthProfile | None):
    """
    Fetches all questions, runs the GAIAAgent on them, submits all answers,
    and displays the results.
    """
    # --- Determine HF Space Runtime URL and Repo URL ---
    space_id = os.getenv("SPACE_ID")

    # Robust username handling
    current_username = "anonymous_user" # Default for local testing or if not logged in
    if profile and profile.username:
        current_username = profile.username
        print(f"User logged in: {current_username}")
    else:
        print("User not logged in through Gradio OAuth, using default username for submission.")
        # Do NOT return here, allow the app to run locally without HF login

    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    try:
        agent = GAIAAgent()
    except Exception as e:
        # Improved error handling for agent initialization
        error_message = f"Failed to initialize agent: {e}. Please ensure all required API keys (HUGGINGFACEHUB_API_TOKEN, SERPAPI_API_KEY, GOOGLE_API_KEY) are set in Hugging Face Space secrets, and model terms accepted."
        print(error_message)
        # Return empty DataFrame with the error message to display in Gradio UI
        return error_message, pd.DataFrame([{"Question ID": "N/A", "Question": "Agent Initialization Failed", "Agent Answer": str(e)}])

    try:
        response = requests.get(questions_url)
        response.raise_for_status()
        questions_data = response.json()
        questions = questions_data # Assume questions_data is directly the list of questions
        print(f"Fetched {len(questions)} questions.")
    except requests.exceptions.RequestException as e:
        return f"Failed to fetch questions: {e}", pd.DataFrame() # Return empty DataFrame on fetch error

    all_answers = []
    results_for_display = []

    for q_data in questions:
        q_id = q_data.get("task_id") # Use 'task_id' as per the data
        q_text = q_data.get("question")
        if not q_id or not q_text:
            print(f"Skipping malformed question data: {q_data}")
            continue

        print(f"\n--- Processing Question ID: {q_id} ---")
        agent_answer = agent(q_text) # This calls the GAIAAgent.__call__ method

        # Ensured submission keys are correct as per GAIA benchmark expectations
        all_answers.append({"task_id": q_id, "submitted_answer": agent_answer}) 
        results_for_display.append({"Question ID": q_id, "Question": q_text, "Agent Answer": agent_answer})

    results_df = pd.DataFrame(results_for_display)

    submission_data = {
        "answers": all_answers,
        "space_id": space_id, # Include SPACE_ID for the leaderboard link
        "username": current_username, # Use the robustly determined username
        "agent_code": inspect.getsource(GAIAAgent), # Add agent code (for debugging on leaderboard)
    }

    try:
        print(f"\nSubmitting {len(all_answers)} answers to {submit_url}...")
        submit_response = requests.post(submit_url, json=submission_data)
        submit_response.raise_for_status()
        submission_result = submit_response.json()
        print("Submission successful!")
        print(f"Submission Result: {submission_result}")

        score = submission_result.get("score", "N/A")
        leaderboard_link = submission_result.get("leaderboard_link", "")
        status_message = f"Evaluation complete! Your score: {score:.2f}%\n"
        if leaderboard_link:
            status_message += f"Check the leaderboard: {leaderboard_link}\n"
        else:
            status_message += "No leaderboard link provided."

        return status_message, results_df

    except requests.exceptions.RequestException as e:
        error_message = f"Failed to submit answers: {e}"
        if hasattr(e, 'response') and e.response is not None:
            error_message += f"\nResponse: {e.response.text}"
        print(error_message)
        return error_message, results_df

# (Keep Gradio UI setup as is)
with gr.Blocks() as demo:
    gr.Markdown(
    """
    # Unit 4: Agentic AI for GAIA Benchmark

    This Gradio app allows you to run your agent against the GAIA benchmark questions and submit your answers.
    Your goal is to modify the `GAIAAgent` class in `app.py` to achieve a score above 30%.
    """
    )
    gr.LoginButton()

    run_button = gr.Button("Run Evaluation & Submit All Answers")

    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

    run_button.click(
        fn=run_and_submit_all,
        outputs=[status_output, results_table]
    )

if __name__ == "__main__":
    print("\n" + "-"*30 + " App Starting " + "-"*30)
    space_host_startup = os.getenv("SPACE_HOST")
    space_id_startup = os.getenv("SPACE_ID")

    if space_host_startup:
        print(f"✅ SPACE_HOST found: {space_host_startup}")
        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
    else:
        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")

    if space_id_startup:
        print(f"✅ SPACE_ID found: {space_id_startup}")
        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
    else:
        print("ℹ️  SPACE_ID environment variable not found...")
    demo.launch()