Isateles commited on
Commit
591a8d1
·
1 Parent(s): a7b80a9

Update GAIA agent-changed to ReAct arc

Browse files
Files changed (2) hide show
  1. app.py +63 -80
  2. tools.py +4 -10
app.py CHANGED
@@ -71,32 +71,39 @@ def setup_llm():
71
 
72
 
73
  def extract_final_answer(response_text: str) -> str:
74
- """Extract answer aligned with GAIA scoring rules - FIXED VERSION"""
75
 
76
- # First, remove any "assistant:" prefix that might have been added
 
 
 
 
 
77
  response_text = re.sub(r'^assistant:\s*', '', response_text, flags=re.IGNORECASE)
78
 
79
  # Look for FINAL ANSWER pattern
80
  match = re.search(r"FINAL ANSWER:\s*(.+?)(?:\n|$)", response_text, re.IGNORECASE | re.DOTALL)
81
 
82
  if not match:
83
- logger.warning("No FINAL ANSWER found in response")
84
- return ""
85
-
86
- answer = match.group(1).strip()
 
 
 
 
 
 
 
 
 
 
87
 
88
- # CRITICAL: Stop processing if we hit "assistant:" or any reasoning text
89
  if 'assistant:' in answer:
90
  answer = answer.split('assistant:')[0].strip()
91
 
92
- # Remove any trailing explanatory text (usually starts with lowercase after answer)
93
- sentences = answer.split('.')
94
- if len(sentences) > 1:
95
- # Check if second sentence starts with lowercase (indicates explanation)
96
- first_sentence = sentences[0].strip()
97
- if first_sentence and (not sentences[1].strip() or sentences[1].strip()[0].islower()):
98
- answer = first_sentence
99
-
100
  # Clean for GAIA scoring
101
 
102
  # 1. Handle pure numbers
@@ -142,7 +149,7 @@ def extract_final_answer(response_text: str) -> str:
142
  return answer
143
 
144
  class GAIAAgent:
145
- """GAIA RAG Agent using LlamaIndex AgentWorkflow"""
146
 
147
  def __init__(self):
148
  logger.info("Initializing GAIA RAG Agent...")
@@ -161,14 +168,18 @@ class GAIAAgent:
161
  for tool in self.tools:
162
  logger.info(f" - {tool.metadata.name}: {tool.metadata.description}")
163
 
164
- # Create agent with GAIA prompt
165
- from llama_index.core.agent.workflow import AgentWorkflow
166
 
167
- self.agent = AgentWorkflow.from_tools_or_functions(
168
- tools_or_functions=self.tools,
169
  llm=self.llm,
 
170
  system_prompt=GAIA_SYSTEM_PROMPT,
171
- verbose=True
 
 
 
172
  )
173
 
174
  logger.info("GAIA RAG Agent ready!")
@@ -177,70 +188,42 @@ class GAIAAgent:
177
  """Process a question and return clean answer for course submission"""
178
  logger.info(f"Processing question: {question[:100]}...")
179
 
180
- import warnings
181
- warnings.filterwarnings("ignore", category=RuntimeWarning, message=".*Event loop is closed.*")
182
-
183
  try:
184
- loop = asyncio.new_event_loop()
185
- asyncio.set_event_loop(loop)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
- try:
188
- async def run_agent():
189
- try:
190
- handler = self.agent.run(user_msg=question)
191
-
192
- # Wait for the result
193
- result = await handler
194
-
195
- # Extract response text more carefully
196
- response_text = ""
197
-
198
- # Try different ways to get the response
199
- if hasattr(result, 'response'):
200
- if hasattr(result.response, 'message'):
201
- if hasattr(result.response.message, 'content'):
202
- response_text = result.response.message.content
203
- else:
204
- response_text = str(result.response.message)
205
- else:
206
- response_text = str(result.response)
207
- elif hasattr(result, 'content'):
208
- response_text = result.content
209
- elif hasattr(result, 'output'):
210
- response_text = result.output
211
- else:
212
- response_text = str(result)
213
-
214
- # Clean up any streaming artifacts
215
- response_text = re.sub(r'assistant:\s*', '', response_text, flags=re.IGNORECASE)
216
-
217
- return response_text
218
-
219
- except Exception as e:
220
- logger.error(f"Agent execution error: {e}")
221
- import traceback
222
- logger.error(traceback.format_exc())
223
- return "FINAL ANSWER: "
224
-
225
- response_text = loop.run_until_complete(
226
- asyncio.wait_for(run_agent(), timeout=60)
227
- )
228
-
229
- # Extract clean answer
230
- clean_answer = extract_final_answer(response_text)
231
-
232
- logger.info(f"Full response preview: {response_text[:200]}...")
233
- logger.info(f"Extracted answer: '{clean_answer}'")
234
-
235
- return clean_answer
236
-
237
- finally:
238
- loop.close()
239
-
240
  except Exception as e:
241
  logger.error(f"Error processing question: {e}")
 
 
242
  return ""
243
-
244
  def run_and_submit_all(profile: gr.OAuthProfile | None):
245
  """Run GAIA evaluation following course template structure"""
246
 
 
71
 
72
 
73
  def extract_final_answer(response_text: str) -> str:
74
+ """Extract answer aligned with GAIA scoring rules"""
75
 
76
+ # Remove any ReAct thinking patterns
77
+ response_text = re.sub(r'Thought:.*?\n', '', response_text, flags=re.DOTALL)
78
+ response_text = re.sub(r'Action:.*?\n', '', response_text, flags=re.DOTALL)
79
+ response_text = re.sub(r'Observation:.*?\n', '', response_text, flags=re.DOTALL)
80
+
81
+ # Remove assistant prefix
82
  response_text = re.sub(r'^assistant:\s*', '', response_text, flags=re.IGNORECASE)
83
 
84
  # Look for FINAL ANSWER pattern
85
  match = re.search(r"FINAL ANSWER:\s*(.+?)(?:\n|$)", response_text, re.IGNORECASE | re.DOTALL)
86
 
87
  if not match:
88
+ # Try to find answer at the end of response
89
+ lines = response_text.strip().split('\n')
90
+ if lines:
91
+ last_line = lines[-1].strip()
92
+ # If last line is short and doesn't look like reasoning
93
+ if last_line and len(last_line) < 50:
94
+ answer = last_line
95
+ else:
96
+ logger.warning("No FINAL ANSWER found")
97
+ return ""
98
+ else:
99
+ return ""
100
+ else:
101
+ answer = match.group(1).strip()
102
 
103
+ # Stop at any continuation
104
  if 'assistant:' in answer:
105
  answer = answer.split('assistant:')[0].strip()
106
 
 
 
 
 
 
 
 
 
107
  # Clean for GAIA scoring
108
 
109
  # 1. Handle pure numbers
 
149
  return answer
150
 
151
  class GAIAAgent:
152
+ """GAIA RAG Agent using ReActAgent for better compatibility"""
153
 
154
  def __init__(self):
155
  logger.info("Initializing GAIA RAG Agent...")
 
168
  for tool in self.tools:
169
  logger.info(f" - {tool.metadata.name}: {tool.metadata.description}")
170
 
171
+ # Create ReActAgent instead of AgentWorkflow
172
+ from llama_index.core.agent import ReActAgent
173
 
174
+ self.agent = ReActAgent.from_tools(
175
+ tools=self.tools,
176
  llm=self.llm,
177
+ verbose=True,
178
  system_prompt=GAIA_SYSTEM_PROMPT,
179
+ max_iterations=10,
180
+ # ReAct specific settings
181
+ react_chat_formatter=None, # Use default ReAct formatter
182
+ output_parser=None, # Use default output parser
183
  )
184
 
185
  logger.info("GAIA RAG Agent ready!")
 
188
  """Process a question and return clean answer for course submission"""
189
  logger.info(f"Processing question: {question[:100]}...")
190
 
 
 
 
191
  try:
192
+ # Much simpler with ReActAgent - just call chat
193
+ response = self.agent.chat(question)
194
+
195
+ # Get the response text
196
+ response_text = str(response)
197
+
198
+ # Clean any artifacts
199
+ response_text = re.sub(r'assistant:\s*', '', response_text, flags=re.IGNORECASE)
200
+
201
+ # Extract clean answer
202
+ clean_answer = extract_final_answer(response_text)
203
+
204
+ if not clean_answer:
205
+ # Fallback: try to extract from response directly
206
+ logger.warning("Primary extraction failed, trying fallback")
207
+ # Look for short answers at the end
208
+ lines = response_text.strip().split('\n')
209
+ for line in reversed(lines):
210
+ line = line.strip()
211
+ if line and len(line) < 100 and not line.startswith(('Thought:', 'Action:', 'Observation:')):
212
+ clean_answer = extract_final_answer(f"FINAL ANSWER: {line}")
213
+ if clean_answer:
214
+ break
215
+
216
+ logger.info(f"Full response: {response_text[:200]}...")
217
+ logger.info(f"Extracted answer: '{clean_answer}'")
218
+
219
+ return clean_answer
220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  except Exception as e:
222
  logger.error(f"Error processing question: {e}")
223
+ import traceback
224
+ logger.error(traceback.format_exc())
225
  return ""
226
+
227
  def run_and_submit_all(profile: gr.OAuthProfile | None):
228
  """Run GAIA evaluation following course template structure"""
229
 
tools.py CHANGED
@@ -556,28 +556,22 @@ def get_gaia_tools(llm=None):
556
  FunctionTool.from_defaults(
557
  fn=search_web,
558
  name="web_search",
559
- description="""Use ONLY for:
560
- 1. Current events after January 2025
561
- 2. Real-time data (stock prices, weather, sports scores)
562
- 3. When question explicitly asks to "search" or "look up"
563
- 4. To verify facts you're uncertain about
564
- Do NOT use for general knowledge, historical facts, or math."""
565
  ),
566
-
567
  FunctionTool.from_defaults(
568
  fn=calculate,
569
  name="calculator",
570
- description="ALWAYS use for ANY math calculation, including simple arithmetic like 2+2. Required for all numbers."
571
  ),
572
  FunctionTool.from_defaults(
573
  fn=analyze_file,
574
  name="file_analyzer",
575
- description="Analyze file contents, especially CSV files. Returns statistics and data insights."
576
  ),
577
  FunctionTool.from_defaults(
578
  fn=get_weather,
579
  name="weather",
580
- description="Get current weather information for any location. Use when asked about weather conditions."
581
  )
582
  ]
583
 
 
556
  FunctionTool.from_defaults(
557
  fn=search_web,
558
  name="web_search",
559
+ description="""Search the web for information. Use when you need current information, real-time data, or to verify facts. Input should be a search query string."""
 
 
 
 
 
560
  ),
 
561
  FunctionTool.from_defaults(
562
  fn=calculate,
563
  name="calculator",
564
+ description="""Perform mathematical calculations. Use for any math problem. Input should be the mathematical expression to evaluate."""
565
  ),
566
  FunctionTool.from_defaults(
567
  fn=analyze_file,
568
  name="file_analyzer",
569
+ description="""Analyze file contents, especially CSV files. Input should be the file content and file type."""
570
  ),
571
  FunctionTool.from_defaults(
572
  fn=get_weather,
573
  name="weather",
574
+ description="""Get current weather for a location. Input should be the location name."""
575
  )
576
  ]
577