Mouhamedamar commited on
Commit
bf26a74
·
verified ·
1 Parent(s): 2159633

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +583 -104
app.py CHANGED
@@ -2,100 +2,577 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
- from langchain_groq import ChatGroq
6
- from langchain_core.messages import HumanMessage
 
 
 
 
 
 
7
  from langchain_community.tools import WikipediaQueryRun
8
  from langchain_community.utilities import WikipediaAPIWrapper
9
  from langchain_tavily import TavilySearch
 
 
 
 
10
 
11
  # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
13
 
14
- # --- Simple Agent Definition ---
15
- class SimpleGAICAgent:
16
- def __init__(self):
17
- print("Initializing Simple GAIA Agent...")
18
- api_key = os.environ.get("GROQ_API_KEY")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  if not api_key:
20
- raise ValueError("GROQ_API_KEY not set")
21
 
22
- # Initialize LLM with Groq
23
- self.llm = ChatGroq(
24
- model="llama-3.1-8b-instant",
25
- api_key=api_key,
26
- temperature=0
27
  )
28
 
29
- # Initialize search tools
30
- try:
31
- self.web_search = TavilySearch(max_results=5)
32
- print("Tavily search initialized")
33
- except:
34
- self.web_search = None
35
- print("Tavily search not available")
36
 
37
- try:
38
- self.wikipedia = WikipediaQueryRun(
39
- api_wrapper=WikipediaAPIWrapper(top_k_results=5)
40
- )
41
- print("Wikipedia search initialized")
42
- except:
43
- self.wikipedia = None
44
- print("Wikipedia not available")
45
-
46
- print("Agent ready!")
 
 
 
 
 
 
 
 
 
47
 
48
- def search_web(self, query: str) -> str:
49
- """Search the web using Tavily"""
50
- if self.web_search:
51
- try:
52
- result = self.web_search.invoke(query)
53
- return result if result else "No results found"
54
- except Exception as e:
55
- return f"Search error: {e}"
56
- return "Web search not available"
57
 
58
- def search_wikipedia(self, query: str) -> str:
59
- """Search Wikipedia"""
60
- if self.wikipedia:
61
- try:
62
- result = self.wikipedia.invoke(query)
63
- return result if result else "No Wikipedia results found"
64
- except Exception as e:
65
- return f"Wikipedia error: {e}"
66
- return "Wikipedia not available"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- def answer_question(self, question: str) -> str:
69
- """Answer a single question"""
70
- print(f"Answering: {question[:100]}...")
 
 
 
 
 
 
 
 
71
 
72
- # First, try to search for relevant information
73
- search_results = ""
74
- if self.web_search:
75
- search_results = self.search_web(question)
 
 
76
 
77
- # Prepare prompt for final answer
78
- prompt = f"""You are a precise AI assistant. Answer the following question with ONLY the exact answer, nothing else.
79
 
 
80
  Question: {question}
81
 
82
- {search_results}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- Answer (exact value only, no explanations):"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
 
 
 
 
 
 
 
 
 
 
86
  try:
87
- response = self.llm.invoke(prompt)
88
- answer = response.content.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  return answer
 
90
  except Exception as e:
91
- return f"Error: {e}"
92
-
93
- def __call__(self, item: dict) -> str:
94
- """Process a task item"""
95
- question = item.get("question", "")
96
- if not question:
97
- return "No question provided"
98
- return self.answer_question(question)
99
 
100
  # --- Gradio Interface Functions ---
101
  def run_and_submit_all(profile: gr.OAuthProfile | None):
@@ -112,10 +589,10 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
112
  questions_url = f"{api_url}/questions"
113
  submit_url = f"{api_url}/submit"
114
 
115
- # Initialize agent
116
  try:
117
- agent = SimpleGAICAgent()
118
  except Exception as e:
 
119
  return f"Error initializing agent: {e}", None
120
 
121
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
@@ -125,67 +602,69 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
125
  response = requests.get(questions_url, timeout=15)
126
  response.raise_for_status()
127
  questions_data = response.json()
128
- print(f"Fetched {len(questions_data)} questions")
 
 
129
  except Exception as e:
130
  return f"Error fetching questions: {e}", None
131
 
132
- # Process each question
133
  results_log = []
134
  answers_payload = []
 
135
 
136
- for i, item in enumerate(questions_data):
137
  task_id = item.get("task_id")
138
- question = item.get("question")
139
-
140
- print(f"\n[{i+1}/{len(questions_data)}] Task: {task_id}")
141
-
142
  try:
143
- answer = agent.answer_question(question)
144
- answers_payload.append({"task_id": task_id, "submitted_answer": answer})
145
- results_log.append({
146
- "Task ID": task_id,
147
- "Question": question[:100] + "..." if len(question) > 100 else question,
148
- "Answer": answer[:100] + "..." if len(answer) > 100 else answer
149
- })
150
- print(f"Answer: {answer[:100]}")
151
  except Exception as e:
152
- print(f"Error: {e}")
153
- results_log.append({"Task ID": task_id, "Question": question, "Answer": f"ERROR"})
154
 
155
  if not answers_payload:
156
- return "No answers produced", pd.DataFrame(results_log)
157
 
158
  # Submit answers
159
- submission_data = {
160
- "username": username.strip(),
161
- "agent_code": agent_code,
162
- "answers": answers_payload
163
- }
164
 
165
  try:
166
  response = requests.post(submit_url, json=submission_data, timeout=120)
167
  response.raise_for_status()
168
- result = response.json()
169
-
170
- status = (
171
  f"✅ Submission Successful!\n"
172
- f"User: {result.get('username')}\n"
173
- f"Score: {result.get('score', 0)}% ({result.get('correct_count', 0)}/20 correct)"
 
174
  )
175
- return status, pd.DataFrame(results_log)
176
  except Exception as e:
177
  return f"Submission failed: {e}", pd.DataFrame(results_log)
178
 
179
  # --- Gradio Interface ---
180
  with gr.Blocks() as demo:
181
- gr.Markdown("# 🦾 GAIA Agent Evaluator")
182
- gr.Markdown("Login and click 'Run' to evaluate your agent.")
183
-
 
 
 
 
 
 
 
 
 
184
  gr.LoginButton()
185
  run_button = gr.Button("🚀 Run Evaluation & Submit", variant="primary")
186
  status_output = gr.Textbox(label="Status", lines=5, interactive=False)
187
  results_table = gr.DataFrame(label="Results", wrap=True)
188
-
189
  run_button.click(
190
  fn=run_and_submit_all,
191
  outputs=[status_output, results_table]
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
+ import re
6
+ from urllib.parse import urlparse
7
+ from typing import TypedDict, List, Optional, Annotated, Tuple, Union, Literal
8
+ from langgraph.graph import StateGraph, END
9
+ from langchain_google_genai import ChatGoogleGenerativeAI
10
+ from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, ToolMessage, BaseMessage
11
+ from langgraph.graph.message import add_messages
12
+ from langchain_core.tools import tool
13
  from langchain_community.tools import WikipediaQueryRun
14
  from langchain_community.utilities import WikipediaAPIWrapper
15
  from langchain_tavily import TavilySearch
16
+ from pydantic import BaseModel, Field
17
+ from langgraph.prebuilt import ToolNode
18
+ from langchain_core.prompts import ChatPromptTemplate
19
+ import operator
20
 
21
  # --- Constants ---
22
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
23
+ TEMP_DIR_BASE = os.path.join(os.getcwd(), "temp_agent_files")
24
 
25
+ # --- Helper Functions ---
26
+ def get_task_temp_dir(task_id: str) -> str:
27
+ """Creates and returns a unique temporary directory for a task."""
28
+ task_dir = os.path.join(TEMP_DIR_BASE, task_id)
29
+ os.makedirs(task_dir, exist_ok=True)
30
+ return task_dir
31
+
32
+ def extract_youtube_id(url: str) -> Optional[str]:
33
+ """Extract YouTube video ID from URL."""
34
+ pattern = r'(?:youtube\.com\/(?:watch\?v=|embed\/)|youtu\.be\/)([a-zA-Z0-9_-]+)'
35
+ match = re.search(pattern, url)
36
+ return match.group(1) if match else None
37
+
38
+ # --- Analysis Tools with Gemini ---
39
+ @tool
40
+ def analyze_youtube_video(url: str, question: str) -> str:
41
+ """
42
+ Analyze a YouTube video using Gemini 2.0 Flash Thinking.
43
+
44
+ Args:
45
+ url: The YouTube video URL
46
+ question: Specific question about the video content
47
+
48
+ Returns:
49
+ Analysis of the video based on the provided question.
50
+ """
51
+ try:
52
+ parsed_url = urlparse(url)
53
+ if not all([parsed_url.scheme, parsed_url.netloc]):
54
+ return "Please provide a valid video URL with http:// or https:// prefix."
55
+
56
+ if 'youtube.com' not in url and 'youtu.be' not in url:
57
+ return "Only YouTube videos are supported at this time."
58
+
59
+ api_key = os.environ.get("GOOGLE_API_KEY")
60
  if not api_key:
61
+ return "Unable to perform analysis: Google API key not set. Get it from https://aistudio.google.com/"
62
 
63
+ llm = ChatGoogleGenerativeAI(
64
+ model="gemini-2.0-flash-thinking-exp-01-21",
65
+ google_api_key=api_key,
66
+ temperature=0,
67
+ max_output_tokens=4096
68
  )
69
 
70
+ prompt = f"""You are analyzing a YouTube video at URL: {url}
 
 
 
 
 
 
71
 
72
+ Question about the video: {question}
73
+
74
+ Based on what you know about this video (if it's a known video) or general knowledge,
75
+ provide a helpful analysis. If you cannot access the video directly, provide
76
+ reasonable information based on the video title/URL if it's recognizable.
77
+
78
+ Analysis:"""
79
+
80
+ response = llm.invoke(prompt)
81
+ return f"## YouTube Video Analysis (URL: {url})\n\n{response.content}"
82
+
83
+ except Exception as e:
84
+ print(f"Error in analyze_youtube_video: {type(e).__name__}: {e}")
85
+ return f"Error analyzing video at {url}: {str(e)}"
86
+
87
+ @tool
88
+ def analyze_text_content(content: str, question: str) -> str:
89
+ """
90
+ Analyze text content using Gemini.
91
 
92
+ Args:
93
+ content: The text content to analyze
94
+ question: Specific question about the content
 
 
 
 
 
 
95
 
96
+ Returns:
97
+ Analysis of the text based on the question.
98
+ """
99
+ try:
100
+ api_key = os.environ.get("GOOGLE_API_KEY")
101
+ if not api_key:
102
+ return "Unable to perform analysis: Google API key not set."
103
+
104
+ llm = ChatGoogleGenerativeAI(
105
+ model="gemini-2.0-flash-thinking-exp-01-21",
106
+ google_api_key=api_key,
107
+ temperature=0,
108
+ max_output_tokens=4096
109
+ )
110
+
111
+ prompt = f"""Analyze the following content and answer the question.
112
+
113
+ Content: {content[:8000]}
114
+
115
+ Question: {question}
116
+
117
+ Provide a concise, accurate answer based ONLY on the content above.
118
+ If the content doesn't contain the answer, say "Information not found in the provided content."
119
+
120
+ Answer:"""
121
+
122
+ response = llm.invoke(prompt)
123
+ return response.content
124
+
125
+ except Exception as e:
126
+ return f"Error analyzing text: {str(e)}"
127
+
128
+ @tool
129
+ def direct_reasoning(question: str, context: str = "") -> str:
130
+ """
131
+ Use Gemini's reasoning capabilities to answer a question.
132
 
133
+ Args:
134
+ question: The question to answer
135
+ context: Optional context to help answer
136
+
137
+ Returns:
138
+ The reasoned answer
139
+ """
140
+ try:
141
+ api_key = os.environ.get("GOOGLE_API_KEY")
142
+ if not api_key:
143
+ return "Google API key not set."
144
 
145
+ llm = ChatGoogleGenerativeAI(
146
+ model="gemini-2.0-flash-thinking-exp-01-21",
147
+ google_api_key=api_key,
148
+ temperature=0,
149
+ max_output_tokens=4096
150
+ )
151
 
152
+ prompt = f"""Answer the following question with ONLY the exact answer, nothing else.
153
+ No explanations, no "FINAL ANSWER", just the answer.
154
 
155
+ {context}
156
  Question: {question}
157
 
158
+ Answer:"""
159
+
160
+ response = llm.invoke(prompt)
161
+ return response.content.strip()
162
+ except Exception as e:
163
+ return f"Error: {str(e)}"
164
+
165
+ # --- Agent State ---
166
+ class TaskState(TypedDict):
167
+ task_id: str
168
+ question: str
169
+ file_name: Optional[str]
170
+ api_url: str
171
+ file_path: Optional[str]
172
+ temp_dir: Optional[str]
173
+ plan: List[str]
174
+ past_steps: Annotated[List[Tuple[str, str]], operator.add]
175
+ response: str
176
+ messages: Annotated[list[BaseMessage], add_messages]
177
+ current_task: str
178
+
179
+ # --- Search Tool Setup ---
180
+ def setup_tavily_search():
181
+ """Set up Tavily search tool"""
182
+ try:
183
+ tavily_api_key = os.environ.get("TAVILY_API_KEY")
184
+ if not tavily_api_key:
185
+ raise ValueError("Tavily API key not found. Set TAVILY_API_KEY environment variable.")
186
+ print("Using Tavily for web search")
187
+ return TavilySearch(max_results=10)
188
+ except Exception as e:
189
+ print(f"Error setting up Tavily: {e}")
190
+ raise
191
+
192
+ # --- LLM Initialization with Gemini ---
193
+ def get_llm():
194
+ """Get Gemini LLM instance"""
195
+ api_key = os.environ.get("GOOGLE_API_KEY")
196
+ if not api_key:
197
+ raise ValueError("GOOGLE_API_KEY environment variable not set. Get it from https://aistudio.google.com/")
198
+ return ChatGoogleGenerativeAI(
199
+ model="gemini-2.0-flash-thinking-exp-01-21",
200
+ google_api_key=api_key,
201
+ temperature=0,
202
+ max_output_tokens=4096
203
+ )
204
+
205
+ llm = get_llm()
206
+
207
+ # --- Tool Definitions ---
208
+ web_search = setup_tavily_search()
209
+ wikipedia_api = WikipediaAPIWrapper(top_k_results=8, use_https=True)
210
+ wikipedia_search = WikipediaQueryRun(api_wrapper=wikipedia_api)
211
+
212
+ tools = [
213
+ analyze_youtube_video,
214
+ analyze_text_content,
215
+ direct_reasoning,
216
+ web_search,
217
+ wikipedia_search
218
+ ]
219
+
220
+ tool_node = ToolNode(tools)
221
+
222
+ # --- Pydantic Models for Planner/Replanner ---
223
+ class Plan(BaseModel):
224
+ """Plan to follow in future"""
225
+ thought: str = Field(description="The reasoning process behind generating this plan.")
226
+ steps: List[str] = Field(description="Different steps to follow, in sorted order.")
227
+
228
+ class Response(BaseModel):
229
+ """Response to user."""
230
+ response: str
231
+
232
+ class Act(BaseModel):
233
+ """Action to perform."""
234
+ thought: str = Field(description="The reasoning process behind choosing this action (Plan or Response).")
235
+ action: Union[Response, Plan] = Field(description="Action to perform. Response for final answer, Plan for more steps.")
236
+
237
+ # --- Planner Prompt Setup ---
238
+ def get_tools_description() -> str:
239
+ """Generate a formatted string describing all available tools."""
240
+ tool_descriptions = []
241
+ for tool in tools:
242
+ name = getattr(tool, "name", str(tool))
243
+ description = getattr(tool, "description", getattr(tool, "__doc__", "No description available"))
244
+ first_line_desc = description.split('\n')[0].strip() if description else "No description available"
245
+ tool_descriptions.append(f"- `{name}`: {first_line_desc}")
246
+ return "\n".join(tool_descriptions)
247
+
248
+ tools_desc = get_tools_description()
249
+
250
+ planner_prompt = ChatPromptTemplate.from_messages(
251
+ [
252
+ (
253
+ "system",
254
+ f"""For the given objective, devise a simple step-by-step plan.
255
+ Also provide a detailed thought process explaining how you arrived at the plan.
256
+ **Plan Requirements:**
257
+ * **Simplicity:** Keep the plan as straightforward as possible.
258
+ * **Task Types:** Each step must be EITHER:
259
+ * A task requiring a specific tool from the available list.
260
+ * A reasoning step for the LLM to perform internally (e.g., summarizing information, comparing results).
261
+ * **Tool Usage:** If a step uses a tool, clearly state the tool name and what it should do.
262
+ * **Conciseness:** Avoid superfluous steps. The result of the final step should be the final answer.
263
+ **Available Tools:**
264
+ {tools_desc}
265
+ Output your thought process and the plan steps.
266
+ """,
267
+ ),
268
+ ("placeholder", "{initial_user_message}"),
269
+ ]
270
+ )
271
+
272
+ planner = planner_prompt | llm.with_structured_output(Plan)
273
+
274
+ # --- Replanner Prompt Setup ---
275
+ replanner_prompt = ChatPromptTemplate.from_template(
276
+ f"""You are a replanner. Your goal is to refine the plan to achieve the objective, or decide if the objective is met.
277
+ **Objective:**
278
+ {{question}}
279
+ **Original Plan (remaining steps):**
280
+ {{plan_str}}
281
+ **History (Executed Steps and Thoughts):**
282
+ {{past_steps_str}}
283
+ **Most Recent Step Executed:** '{{current_task}}'
284
+ **Direct Result of Last Step:**
285
+ {{latest_result}}
286
+ **Your Task:**
287
+ Analyze the **History (Executed Steps and Thoughts)** and the **Direct Result of Last Step** carefully.
288
+ * If the last step successfully moved towards the objective, continue the plan or refine it.
289
+ * If the last step failed, resulted in an error, or the **History** suggests the current approach is not working, you MUST revise the plan to try a different approach.
290
+ Based on this analysis, decide the next course of action (Respond or Revise Plan).
291
+ **Action Options:**
292
+ 1. **Respond (Response action):** If the objective is met and you have the final answer, provide it.
293
+ 2. **Revise Plan (Plan action):** If more steps are needed, provide a new, simple plan containing only the remaining steps.
294
+ **Available Tools:**
295
+ {tools_desc}
296
+ Output your thought process and the chosen action (Plan or Response).
297
+ """
298
+ )
299
+
300
+ replanner = replanner_prompt | llm.with_structured_output(Act)
301
+
302
+ # --- Agent Node Functions ---
303
+ def plan_step(state: TaskState):
304
+ """Generate the initial plan based on the initial question/file info."""
305
+ plan_output = planner.invoke({"initial_user_message": state["messages"]})
306
+ return {
307
+ "plan": plan_output.steps,
308
+ "messages": []
309
+ }
310
+
311
+ def prepare_next_step(state: TaskState):
312
+ """Prepare the state for the executor LLM call for the next plan step."""
313
+ plan = state["plan"]
314
+ original_question = state["question"]
315
+ current_task = plan[0] if plan else ""
316
+ remaining_plan = plan[1:] if plan else []
317
+
318
+ task_message_content = f"""Original User Question: {original_question}
319
+ Current Task: {current_task}
320
+ Based *only* on the 'Current Task' description above, decide if a tool needs to be called.
321
+ If you call an analysis tool, pass the necessary arguments.
322
+ If no tool is needed for the Current Task, explain the reasoning or result based on the task description.
323
+ """
324
+ task_message = HumanMessage(content=task_message_content)
325
 
326
+ updated_messages = state.get("messages", []) + [task_message]
327
+
328
+ return {
329
+ "plan": remaining_plan,
330
+ "current_task": current_task,
331
+ "messages": updated_messages
332
+ }
333
+
334
+ def executor_llm_call(state: TaskState):
335
+ """Invoke the LLM with the current task, deciding on tool use."""
336
+ model_with_tools = llm.bind_tools(tools)
337
+ response = model_with_tools.invoke(state["messages"])
338
+ return {"messages": [response]}
339
+
340
+ def replan_step(state: TaskState):
341
+ """Replans based on the completed step's result and history."""
342
+ current_task = state["current_task"]
343
+ messages = state["messages"]
344
+
345
+ latest_result = ""
346
+ if messages:
347
+ last_message = messages[-1]
348
+ if isinstance(last_message, AIMessage):
349
+ latest_result = last_message.content
350
+ elif isinstance(last_message, ToolMessage):
351
+ latest_result = last_message.content
352
+ else:
353
+ latest_result = str(last_message)
354
+ else:
355
+ latest_result = "(No message found for task result)"
356
+
357
+ past_steps_str = "\n".join(
358
+ f"Step: {task}\nThought: {thought}" for task, thought in state.get("past_steps", [])
359
+ )
360
+ plan_str = "\n".join(f"{i+1}. {step}" for i, step in enumerate(state.get("plan", [])))
361
+
362
+ replanner_input = {
363
+ "question": state["question"],
364
+ "plan_str": plan_str,
365
+ "past_steps_str": past_steps_str,
366
+ "current_task": current_task,
367
+ "latest_result": latest_result,
368
+ }
369
+
370
+ output = replanner.invoke(replanner_input)
371
+
372
+ updated_past_steps = [(current_task, output.thought)]
373
+
374
+ if isinstance(output.action, Response):
375
+ print(f"Replanner provided a final response: {output.action.response}")
376
+ final_answer_prompt = f"""The user's original question was: {state['question']}
377
+ The result determined by the plan is: {output.action.response}
378
+ Based on this result, output ONLY the final formatted answer itself, and nothing else.
379
+ Keep the answer concise and exact."""
380
+
381
+ final_answer_llm = get_llm()
382
+ extracted_response = final_answer_llm.invoke(final_answer_prompt).content.strip()
383
+
384
+ return {
385
+ "response": extracted_response,
386
+ "past_steps": updated_past_steps,
387
+ "messages": [],
388
+ "current_task": ""
389
+ }
390
+ else:
391
+ return {
392
+ "plan": output.action.steps,
393
+ "past_steps": updated_past_steps,
394
+ "messages": state["messages"],
395
+ "current_task": ""
396
+ }
397
+
398
+ # --- Conditional Routing Functions ---
399
+ def route_after_executor_call(state: TaskState) -> Literal["tool_node", "replan_step"]:
400
+ """Route to tool node if tool call exists, otherwise to replan."""
401
+ messages = state["messages"]
402
+ last_message = messages[-1] if messages else None
403
+ if isinstance(last_message, AIMessage) and last_message.tool_calls:
404
+ return "tool_node"
405
+ else:
406
+ return "replan_step"
407
+
408
+ def route_after_replan(state: TaskState) -> Literal["prepare_next_step", END]:
409
+ """Route to prepare next step if plan exists, otherwise end."""
410
+ if state.get("response"):
411
+ return END
412
+ elif state.get("plan"):
413
+ return "prepare_next_step"
414
+ else:
415
+ print("Warning: Replanner finished without response or new plan.")
416
+ return END
417
+
418
+ # --- File Handling Functions ---
419
+ def download_file(task_id: str, file_name: str, api_url: str = DEFAULT_API_URL) -> str:
420
+ """Downloads file, returns path or empty string on failure."""
421
+ temp_dir = get_task_temp_dir(task_id)
422
+ file_url = f"{api_url}/files/{task_id}"
423
+ file_path = os.path.join(temp_dir, file_name)
424
+
425
+ try:
426
+ response = requests.get(file_url, stream=True)
427
+ response.raise_for_status()
428
+ with open(file_path, 'wb') as f:
429
+ for chunk in response.iter_content(chunk_size=8192):
430
+ f.write(chunk)
431
+ print(f"File downloaded successfully to {file_path}")
432
+ return file_path
433
+ except Exception as e:
434
+ print(f"Error downloading file: {str(e)}")
435
+ return ""
436
+
437
+ def process_file(state: TaskState):
438
+ """Download file if needed, prepare initial state and message."""
439
+ task_id = state.get("task_id", "")
440
+ file_name = state.get("file_name", "")
441
+ api_url = state.get("api_url", DEFAULT_API_URL)
442
+ question = state.get("question", "")
443
+ initial_message_content = question
444
+
445
+ file_path_update = {}
446
+ temp_dir_update = {}
447
+
448
+ if task_id and file_name:
449
+ temp_dir = get_task_temp_dir(task_id)
450
+ temp_dir_update = {"temp_dir": temp_dir}
451
+ file_path = download_file(task_id, file_name, api_url)
452
+ file_path_update = {"file_path": file_path}
453
+ if file_path:
454
+ initial_message_content += f"\n\n(Note: File downloaded to: {file_path})"
455
+ else:
456
+ initial_message_content += f"\n\n(Note: Failed to download file '{file_name}')"
457
+
458
+ return {
459
+ "question": question,
460
+ "task_id": task_id,
461
+ "file_name": file_name,
462
+ "api_url": api_url,
463
+ **file_path_update,
464
+ **temp_dir_update,
465
+ "messages": [HumanMessage(content=initial_message_content)],
466
+ "plan": [],
467
+ "past_steps": [],
468
+ "response": "",
469
+ "current_task": "",
470
+ }
471
+
472
+ def process_input(state: TaskState) -> TaskState:
473
+ """Prepare initial state when no file processing is needed."""
474
+ question = state.get("question", "")
475
+ return {
476
+ "question": question,
477
+ "task_id": state.get("task_id", ""),
478
+ "file_name": None,
479
+ "api_url": state.get("api_url", DEFAULT_API_URL),
480
+ "file_path": None,
481
+ "temp_dir": None,
482
+ "messages": [HumanMessage(content=question)],
483
+ "plan": [],
484
+ "past_steps": [],
485
+ "response": "",
486
+ "current_task": "",
487
+ }
488
+
489
+ def should_process_file(state: TaskState) -> Literal["process_file", "process_input"]:
490
+ """Determine entry point based on file presence."""
491
+ task_id = state.get("task_id", "")
492
+ file_name = state.get("file_name", "")
493
+ if task_id and file_name:
494
+ return "process_file"
495
+ return "process_input"
496
+
497
+ # --- Build Graph ---
498
+ def create_plan_execute_task_flow():
499
+ """Creates the LangGraph StateGraph for plan-and-execute agent."""
500
+ graph = StateGraph(TaskState)
501
+
502
+ # Add nodes
503
+ graph.add_node("process_input", process_input)
504
+ graph.add_node("process_file", process_file)
505
+ graph.add_node("planner", plan_step)
506
+ graph.add_node("prepare_next_step", prepare_next_step)
507
+ graph.add_node("executor_llm_call", executor_llm_call)
508
+ graph.add_node("tool_node", tool_node)
509
+ graph.add_node("replan_step", replan_step)
510
+
511
+ # Define edges
512
+ graph.set_conditional_entry_point(
513
+ should_process_file,
514
+ {"process_file": "process_file", "process_input": "process_input"}
515
+ )
516
+ graph.add_edge("process_input", "planner")
517
+ graph.add_edge("process_file", "planner")
518
+ graph.add_edge("planner", "prepare_next_step")
519
+ graph.add_edge("prepare_next_step", "executor_llm_call")
520
+ graph.add_conditional_edges(
521
+ "executor_llm_call",
522
+ route_after_executor_call,
523
+ {"tool_node": "tool_node", "replan_step": "replan_step"}
524
+ )
525
+ graph.add_edge("tool_node", "replan_step")
526
+ graph.add_conditional_edges(
527
+ "replan_step",
528
+ route_after_replan,
529
+ {"prepare_next_step": "prepare_next_step", END: END}
530
+ )
531
+
532
+ app = graph.compile()
533
+ print("Plan-and-execute task graph compiled.")
534
+ return app, graph
535
+
536
+ # --- LangGraph Agent Wrapper ---
537
+ class LangGraphAgent:
538
+ def __init__(self):
539
+ print("LangGraphAgent initialized with Plan-and-Execute flow.")
540
+ self.app_executor, _ = create_plan_execute_task_flow()
541
 
542
+ def __call__(self, item: dict) -> str:
543
+ task_id = item.get("task_id")
544
+ question = item.get("question")
545
+ file_name = item.get("file_name", None)
546
+
547
+ print(f"Agent received task {task_id}: {question[:50]}... (File: {file_name})")
548
+
549
+ if not question:
550
+ return "Error: Missing question in task item."
551
+
552
  try:
553
+ initial_state = {
554
+ "task_id": task_id,
555
+ "question": question,
556
+ "file_name": file_name if file_name else None,
557
+ "api_url": DEFAULT_API_URL
558
+ }
559
+
560
+ print(f"Invoking agent for task {task_id}...")
561
+ result = self.app_executor.invoke(initial_state)
562
+
563
+ answer = result.get("response", "Error: No final response generated.")
564
+
565
+ if not isinstance(answer, str):
566
+ answer = str(answer)
567
+
568
+ print(f"Agent returning answer for task {task_id}: {answer[:50]}...")
569
  return answer
570
+
571
  except Exception as e:
572
+ print(f"Error processing task {task_id}: {e}")
573
+ import traceback
574
+ traceback.print_exc()
575
+ return f"Error: {str(e)}"
 
 
 
 
576
 
577
  # --- Gradio Interface Functions ---
578
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
589
  questions_url = f"{api_url}/questions"
590
  submit_url = f"{api_url}/submit"
591
 
 
592
  try:
593
+ agent = LangGraphAgent()
594
  except Exception as e:
595
+ print(f"Error instantiating agent: {e}")
596
  return f"Error initializing agent: {e}", None
597
 
598
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
602
  response = requests.get(questions_url, timeout=15)
603
  response.raise_for_status()
604
  questions_data = response.json()
605
+ if not questions_data:
606
+ return "Fetched questions list is empty.", None
607
+ print(f"Fetched {len(questions_data)} questions.")
608
  except Exception as e:
609
  return f"Error fetching questions: {e}", None
610
 
611
+ # Run agent on questions
612
  results_log = []
613
  answers_payload = []
614
+ print(f"Running agent on {len(questions_data)} questions...")
615
 
616
+ for item in questions_data:
617
  task_id = item.get("task_id")
618
+ question_text = item.get("question")
619
+ if not task_id or question_text is None:
620
+ continue
 
621
  try:
622
+ submitted_answer = agent(item)
623
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
624
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
625
  except Exception as e:
626
+ print(f"Error on task {task_id}: {e}")
627
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
628
 
629
  if not answers_payload:
630
+ return "No answers produced.", pd.DataFrame(results_log)
631
 
632
  # Submit answers
633
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
 
 
634
 
635
  try:
636
  response = requests.post(submit_url, json=submission_data, timeout=120)
637
  response.raise_for_status()
638
+ result_data = response.json()
639
+ final_status = (
 
640
  f"✅ Submission Successful!\n"
641
+ f"User: {result_data.get('username')}\n"
642
+ f"Score: {result_data.get('score', 'N/A')}% "
643
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)"
644
  )
645
+ return final_status, pd.DataFrame(results_log)
646
  except Exception as e:
647
  return f"Submission failed: {e}", pd.DataFrame(results_log)
648
 
649
  # --- Gradio Interface ---
650
  with gr.Blocks() as demo:
651
+ gr.Markdown("# 🦾 GAIA Agent Evaluator - Gemini Edition")
652
+ gr.Markdown(
653
+ """
654
+ **Instructions:**
655
+ 1. Login to Hugging Face
656
+ 2. Click 'Run Evaluation & Submit'
657
+ 3. Wait for the agent to process all questions
658
+
659
+ **Model:** Gemini 2.0 Flash Thinking (gratuit, excellent pour le raisonnement)
660
+ """
661
+ )
662
+
663
  gr.LoginButton()
664
  run_button = gr.Button("🚀 Run Evaluation & Submit", variant="primary")
665
  status_output = gr.Textbox(label="Status", lines=5, interactive=False)
666
  results_table = gr.DataFrame(label="Results", wrap=True)
667
+
668
  run_button.click(
669
  fn=run_and_submit_all,
670
  outputs=[status_output, results_table]