jebaponselvasingh commited on
Commit
e0ff305
·
1 Parent(s): e95a92e

Add application file

Browse files
Files changed (2) hide show
  1. agent_enhanced.py +202 -33
  2. app.py +1 -1
agent_enhanced.py CHANGED
@@ -313,34 +313,108 @@ Provide PRECISE, EXACT answers. The benchmark uses EXACT STRING MATCHING, so you
313
  - ✅ CORRECT: "Paris"
314
  - ❌ WRONG: "The answer is Paris because..."
315
 
316
- ## Problem-Solving Strategy
317
- 1. **Understand**: Read the question carefully. What exactly is being asked? Note any specific format requirements.
318
- 2. **Check for File**: If a file is mentioned or available, ALWAYS read it FIRST - the answer is likely there.
319
- 3. **Plan**: What information do I need? Which tools should I use?
320
- 4. **Execute**: Use tools systematically. Verify information from multiple sources when possible.
321
- 5. **Verify**: Double-check your answer format. Does it match the question's requirements? Is spelling correct?
322
- 6. **Respond**: Give ONLY the final answer, no prefixes, no explanations.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
 
324
  ## Available Tools
325
  - `read_file`: Read PDFs, spreadsheets, text files - USE THIS FIRST if a file is available
326
- - `web_search`: Current information, recent events, facts
327
- - `wikipedia_search`: Historical facts, biographies, definitions
328
- - `python_executor`: Calculations, data processing, analysis
329
- - `calculator`: Quick mathematical calculations
330
-
331
- ## Tool Usage Priority
332
- 1. **If file available**: Read file FIRST before doing anything else
333
- 2. **For calculations**: Use python_executor for complex math, calculator for simple expressions
334
- 3. **For facts**: Use wikipedia_search for established facts, web_search for current/recent information
335
- 4. **Cross-reference**: When possible, verify important facts from multiple sources
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
 
337
  ## Critical Reminders
338
  - NEVER include "FINAL ANSWER:" or any prefix in your response
339
  - NEVER add explanations or context to your final answer
340
  - ALWAYS verify spelling, capitalization, and formatting
341
- - ALWAYS read files first if they are available
 
 
342
  - If uncertain about format, look for clues in the question itself
343
  - Never guess - use tools to find accurate information
 
 
 
 
 
 
 
 
 
344
 
345
  Remember: Your final message must contain ONLY the answer, nothing else. The scoring system uses exact string matching."""
346
 
@@ -354,7 +428,7 @@ class GAIAAgent:
354
  model_name: str = "gpt-4o",
355
  api_key: str = None,
356
  temperature: float = 0,
357
- max_iterations: int = 15
358
  ):
359
  """
360
  Initialize the GAIA agent.
@@ -407,12 +481,15 @@ class GAIAAgent:
407
  messages = state["messages"]
408
  iteration = state.get("iteration_count", 0)
409
 
410
- # Add iteration warnings earlier to give agent more time to finish
411
- if iteration >= self.max_iterations - 3:
412
- warning_msg = "WARNING: Approaching iteration limit. Please provide your final answer now. Remember: just the answer, no prefix."
413
  messages = list(messages) + [SystemMessage(content=warning_msg)]
414
  elif iteration >= self.max_iterations - 5:
415
- reminder_msg = "Reminder: When you're ready to answer, provide ONLY the final answer with no prefix like 'FINAL ANSWER:' or 'The answer is:'"
 
 
 
416
  messages = list(messages) + [SystemMessage(content=reminder_msg)]
417
 
418
  try:
@@ -447,15 +524,46 @@ class GAIAAgent:
447
 
448
  def _extract_answer_node(self, state: AgentState) -> dict:
449
  """Extract and clean the final answer."""
450
- last_message = state["messages"][-1]
 
 
 
 
451
  content = last_message.content if hasattr(last_message, "content") else str(last_message)
452
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453
  answer = self._clean_answer(content)
454
 
455
  return {"final_answer": answer}
456
 
457
  def _clean_answer(self, raw_answer: str) -> str:
458
  """Clean and format the final answer for exact matching."""
 
 
 
459
  answer = raw_answer.strip()
460
 
461
  # Remove common prefixes (case-insensitive, with variations)
@@ -468,33 +576,78 @@ class GAIAAgent:
468
  "solution:", "solution", "solution is:",
469
  "the solution is:", "the solution is",
470
  "it is", "it's", "that is", "that's",
 
 
 
 
471
  ]
472
 
473
  answer_lower = answer.lower()
474
  for prefix in prefixes:
475
  if answer_lower.startswith(prefix):
476
  answer = answer[len(prefix):].strip()
477
- # Remove any leading colon or dash
478
- answer = answer.lstrip(':').lstrip('-').strip()
479
  answer_lower = answer.lower()
480
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
  # Remove quotes if they wrap the entire answer
482
  if (answer.startswith('"') and answer.endswith('"')) or \
483
  (answer.startswith("'") and answer.endswith("'")):
484
  answer = answer[1:-1].strip()
485
 
486
  # Remove trailing periods, commas, or semicolons for single-word/number answers
 
487
  if answer and ' ' not in answer:
488
- answer = answer.rstrip('.,;:')
 
 
489
 
490
  # Remove leading/trailing whitespace and normalize internal whitespace
491
- answer = ' '.join(answer.split())
 
 
 
 
 
492
 
493
  # Remove markdown formatting if present
494
  if answer.startswith('**') and answer.endswith('**'):
495
- answer = answer[2:-2]
496
- if answer.startswith('*') and answer.endswith('*'):
497
- answer = answer[1:-1]
 
 
 
 
 
 
 
 
 
 
498
 
499
  return answer.strip()
500
 
@@ -513,8 +666,24 @@ class GAIAAgent:
513
  # Prepare the user message with file priority
514
  user_content = question
515
  if file_path and os.path.exists(file_path):
516
- # Strongly emphasize reading the file first
517
- user_content = f"[IMPORTANT: A file is available at {file_path}]\n\nYou MUST read this file FIRST using the read_file tool before attempting to answer. The answer is very likely contained in this file.\n\nQuestion: {question}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
518
 
519
  # Initialize state
520
  initial_state: AgentState = {
 
313
  - ✅ CORRECT: "Paris"
314
  - ❌ WRONG: "The answer is Paris because..."
315
 
316
+ ## Detailed Problem-Solving Strategy
317
+
318
+ ### Step 1: Analyze the Question
319
+ - Read the question word-by-word. What exactly is being asked?
320
+ - Identify keywords: "what", "who", "when", "where", "how many", "calculate", "find"
321
+ - Note any format requirements or constraints mentioned in the question
322
+ - Check if the question references specific data, files, or time periods
323
+
324
+ ### Step 2: File Priority (CRITICAL)
325
+ - If a file is mentioned or available, you MUST read it FIRST before any other action
326
+ - Files often contain the exact answer or the data needed to calculate it
327
+ - After reading the file, carefully search through ALL content - don't miss details
328
+ - For Excel/CSV files, examine ALL sheets and ALL columns
329
+ - For PDFs, read ALL pages - answers can be anywhere in the document
330
+
331
+ ### Step 3: Plan Your Approach
332
+ - Based on the question type, decide which tools you need:
333
+ - **Data extraction from file**: read_file (then possibly python_executor for analysis)
334
+ - **Mathematical calculations**: python_executor or calculator
335
+ - **Historical/factual information**: wikipedia_search first, then web_search if needed
336
+ - **Current/recent information**: web_search
337
+ - **Complex data analysis**: python_executor with pandas/numpy
338
+ - Create a step-by-step plan before executing
339
+
340
+ ### Step 4: Execute Systematically
341
+ - Use ONE tool at a time, wait for results
342
+ - For file-based questions: read file → extract relevant data → calculate/analyze → verify
343
+ - For fact-based questions: search → verify from multiple sources if possible → extract exact answer
344
+ - For calculation questions: gather inputs → perform calculation → double-check math
345
+ - If initial search doesn't yield results, try different query keywords
346
+
347
+ ### Step 5: Verify and Cross-Check
348
+ - Verify your answer matches what was asked
349
+ - For names: double-check spelling, capitalization, punctuation
350
+ - For numbers: verify calculations, check units, ensure precision
351
+ - For dates: verify format matches question requirements
352
+ - If you found information from one source, try to verify with another if time permits
353
+ - For lists: ensure proper comma-separated format with NO spaces
354
+
355
+ ### Step 6: Format Correctly
356
+ - Remove ALL prefixes ("FINAL ANSWER:", "The answer is:", etc.)
357
+ - Remove ALL explanations and context
358
+ - Ensure exact formatting (spaces, commas, capitalization)
359
+ - Double-check: is this the EXACT format the question expects?
360
 
361
  ## Available Tools
362
  - `read_file`: Read PDFs, spreadsheets, text files - USE THIS FIRST if a file is available
363
+ - `web_search`: Current information, recent events, facts (use for recent/current info)
364
+ - `wikipedia_search`: Historical facts, biographies, definitions (use for established facts)
365
+ - `python_executor`: Calculations, data processing, analysis (use for complex calculations or data analysis)
366
+ - `calculator`: Quick mathematical calculations (use for simple arithmetic)
367
+
368
+ ## Tool Usage Guidelines
369
+
370
+ ### Reading Files (HIGHEST PRIORITY)
371
+ - ALWAYS read files FIRST if available
372
+ - For Excel files: check ALL sheets, read ALL relevant columns
373
+ - For PDFs: read ALL pages, search for keywords from the question
374
+ - For CSV files: examine ALL rows, look for patterns
375
+ - Extract numbers, names, dates EXACTLY as they appear
376
+
377
+ ### Web Search Strategy
378
+ - Use specific, targeted queries with key terms from the question
379
+ - If first search doesn't help, try rephrasing with different keywords
380
+ - Look for official sources, authoritative websites
381
+ - Extract exact values (numbers, names) - don't round or approximate
382
+
383
+ ### Wikipedia Search Strategy
384
+ - Use exact terms or names from the question
385
+ - Read the summary/intro carefully - it often contains the answer
386
+ - Check spelling, capitalization, dates exactly as shown
387
+ - For biographical questions, search for the person's name
388
+
389
+ ### Python Execution
390
+ - Use for calculations, data analysis, or processing file contents
391
+ - Be explicit with calculations - show your work in code
392
+ - Use appropriate precision - don't round unnecessarily
393
+ - Print the final result clearly
394
+
395
+ ### Calculator
396
+ - Use for simple arithmetic operations
397
+ - Preserve precision - use exact fractions if possible
398
+ - Format output correctly (integers as integers, decimals as needed)
399
 
400
  ## Critical Reminders
401
  - NEVER include "FINAL ANSWER:" or any prefix in your response
402
  - NEVER add explanations or context to your final answer
403
  - ALWAYS verify spelling, capitalization, and formatting
404
+ - ALWAYS read files first if they are available - don't skip this step
405
+ - For file-based questions, the answer is almost always in the file
406
+ - Extract exact values - don't approximate or round unless necessary
407
  - If uncertain about format, look for clues in the question itself
408
  - Never guess - use tools to find accurate information
409
+ - Use multiple tools if needed - don't stop after the first result if unsure
410
+ - Cross-reference important facts when possible
411
+
412
+ ## When You're Ready to Answer
413
+ - Review your final answer one more time
414
+ - Ensure it's formatted correctly (no prefixes, no explanations)
415
+ - Ensure spelling, capitalization, and punctuation are exact
416
+ - Ensure numbers are precise
417
+ - When satisfied, respond with ONLY the answer - nothing else
418
 
419
  Remember: Your final message must contain ONLY the answer, nothing else. The scoring system uses exact string matching."""
420
 
 
428
  model_name: str = "gpt-4o",
429
  api_key: str = None,
430
  temperature: float = 0,
431
+ max_iterations: int = 25
432
  ):
433
  """
434
  Initialize the GAIA agent.
 
481
  messages = state["messages"]
482
  iteration = state.get("iteration_count", 0)
483
 
484
+ # Add iteration warnings to guide the agent
485
+ if iteration >= self.max_iterations - 2:
486
+ warning_msg = "⚠️ CRITICAL: You have reached the iteration limit. You MUST provide your final answer NOW in your next response. Format: ONLY the answer itself, no prefixes like 'FINAL ANSWER:' or 'The answer is:' - just the answer."
487
  messages = list(messages) + [SystemMessage(content=warning_msg)]
488
  elif iteration >= self.max_iterations - 5:
489
+ warning_msg = "⚠️ WARNING: Approaching iteration limit. Start wrapping up and provide your final answer soon. Remember: just the answer, no prefix."
490
+ messages = list(messages) + [SystemMessage(content=warning_msg)]
491
+ elif iteration >= self.max_iterations - 8:
492
+ reminder_msg = "Reminder: When you're ready to answer, provide ONLY the final answer with no prefix like 'FINAL ANSWER:' or 'The answer is:'. Check your answer format carefully."
493
  messages = list(messages) + [SystemMessage(content=reminder_msg)]
494
 
495
  try:
 
524
 
525
  def _extract_answer_node(self, state: AgentState) -> dict:
526
  """Extract and clean the final answer."""
527
+ # Try to find the answer in the last few messages
528
+ messages = state["messages"]
529
+
530
+ # Look for answer in last message first
531
+ last_message = messages[-1]
532
  content = last_message.content if hasattr(last_message, "content") else str(last_message)
533
 
534
+ # If last message is empty or doesn't contain clear answer, check previous messages
535
+ if not content or len(content.strip()) < 3:
536
+ # Look backwards through messages for the last non-empty content
537
+ for msg in reversed(messages[:-1]):
538
+ msg_content = msg.content if hasattr(msg, "content") else str(msg)
539
+ if msg_content and len(msg_content.strip()) >= 3:
540
+ content = msg_content
541
+ break
542
+
543
+ # Also check if we have tool results that might contain the answer
544
+ # Look for tool results in recent messages
545
+ for msg in reversed(messages[-5:]): # Check last 5 messages
546
+ if hasattr(msg, "content") and msg.content:
547
+ # Sometimes answers are in tool responses
548
+ if "result" in msg.content.lower() or "answer" in msg.content.lower():
549
+ # Extract potential answer from tool response
550
+ lines = msg.content.split('\n')
551
+ for line in lines:
552
+ line_lower = line.lower()
553
+ if any(word in line_lower for word in ["the answer is", "result is", "found:", "value:", "equals"]):
554
+ # Try to extract just the answer part
555
+ content = line
556
+ break
557
+
558
  answer = self._clean_answer(content)
559
 
560
  return {"final_answer": answer}
561
 
562
  def _clean_answer(self, raw_answer: str) -> str:
563
  """Clean and format the final answer for exact matching."""
564
+ if not raw_answer:
565
+ return ""
566
+
567
  answer = raw_answer.strip()
568
 
569
  # Remove common prefixes (case-insensitive, with variations)
 
576
  "solution:", "solution", "solution is:",
577
  "the solution is:", "the solution is",
578
  "it is", "it's", "that is", "that's",
579
+ "the value is:", "the value is", "value is:",
580
+ "the result is:", "the result is",
581
+ "found:", "found", "equals:", "equals", "is:",
582
+ "according to the", "based on the", "from the",
583
  ]
584
 
585
  answer_lower = answer.lower()
586
  for prefix in prefixes:
587
  if answer_lower.startswith(prefix):
588
  answer = answer[len(prefix):].strip()
589
+ # Remove any leading colon, dash, or space
590
+ answer = answer.lstrip(':').lstrip('-').lstrip().strip()
591
  answer_lower = answer.lower()
592
 
593
+ # Remove explanations after the answer (look for common patterns)
594
+ # Split by common explanation starters
595
+ explanation_markers = [" because", " since", " as", " due to", " which", " that", " - ", " (", " [", "\n\n"]
596
+ for marker in explanation_markers:
597
+ if marker in answer:
598
+ # For some markers, split and take first part
599
+ if marker in [" - ", "\n\n"]:
600
+ answer = answer.split(marker)[0].strip()
601
+ # For parentheses/brackets, be more careful
602
+ elif marker in [" (", " ["]:
603
+ # Only remove if it looks like an explanation
604
+ idx = answer.find(marker)
605
+ if idx > 0 and idx < len(answer) - 3: # Not at start/end
606
+ # Check if it's likely an explanation (has words, not just numbers/dates)
607
+ rest = answer[idx+1:]
608
+ if any(char.isalpha() for char in rest[:20]): # Has letters in first 20 chars
609
+ answer = answer[:idx].strip()
610
+ else:
611
+ # For words like "because", split and take first part
612
+ parts = answer.split(marker, 1)
613
+ if len(parts) > 1:
614
+ answer = parts[0].strip()
615
+
616
  # Remove quotes if they wrap the entire answer
617
  if (answer.startswith('"') and answer.endswith('"')) or \
618
  (answer.startswith("'") and answer.endswith("'")):
619
  answer = answer[1:-1].strip()
620
 
621
  # Remove trailing periods, commas, or semicolons for single-word/number answers
622
+ # But preserve trailing punctuation for dates or other formatted answers
623
  if answer and ' ' not in answer:
624
+ # Don't remove trailing punctuation if it's part of a date format or URL
625
+ if not (answer.count('-') == 2 or answer.count('/') == 2 or '://' in answer):
626
+ answer = answer.rstrip('.,;:')
627
 
628
  # Remove leading/trailing whitespace and normalize internal whitespace
629
+ # But preserve formatting for lists (comma-separated)
630
+ if ',' in answer and ' ' not in answer.replace(',', '').replace(' ', ''):
631
+ # Comma-separated list without spaces - keep as is
632
+ answer = answer.strip()
633
+ else:
634
+ answer = ' '.join(answer.split())
635
 
636
  # Remove markdown formatting if present
637
  if answer.startswith('**') and answer.endswith('**'):
638
+ answer = answer[2:-2].strip()
639
+ if answer.startswith('*') and answer.endswith('*') and not answer.startswith('**'):
640
+ answer = answer[1:-1].strip()
641
+
642
+ # Remove code block markers if present
643
+ if answer.startswith('```') and answer.endswith('```'):
644
+ lines = answer.split('\n')
645
+ if len(lines) > 2:
646
+ answer = '\n'.join(lines[1:-1]).strip()
647
+
648
+ # Final cleanup: remove any remaining explanation patterns at the end
649
+ answer = answer.split('\n')[0].strip() # Take first line only
650
+ answer = answer.split('.')[0].strip() if answer.count('.') > 1 else answer # Take first sentence if multiple
651
 
652
  return answer.strip()
653
 
 
666
  # Prepare the user message with file priority
667
  user_content = question
668
  if file_path and os.path.exists(file_path):
669
+ # Strongly emphasize reading the file first with detailed instructions
670
+ file_extension = os.path.splitext(file_path)[1].lower()
671
+ file_instructions = ""
672
+
673
+ if file_extension in ['.xlsx', '.xls', '.csv']:
674
+ file_instructions = "This is a spreadsheet file. Read it completely and examine ALL sheets (if Excel) and ALL columns. The answer is likely a number, date, name, or value extracted from this data. After reading, you may need to perform calculations or analysis using python_executor."
675
+ elif file_extension == '.pdf':
676
+ file_instructions = "This is a PDF file. Read ALL pages carefully. The answer may be anywhere in the document - in tables, text, or images. Search for keywords from the question."
677
+ else:
678
+ file_instructions = "This is a text-based file. Read it completely and carefully. The answer is likely somewhere in this file - look for exact values, names, dates, or information that matches the question."
679
+
680
+ user_content = f"""CRITICAL: A file is available at {file_path}
681
+
682
+ {file_instructions}
683
+
684
+ **You MUST read this file FIRST before doing anything else.** Do not search the web or use other tools until you have read the file completely. The answer is very likely in this file.
685
+
686
+ Question: {question}"""
687
 
688
  # Initialize state
689
  initial_state: AgentState = {
app.py CHANGED
@@ -240,7 +240,7 @@ def submit_to_leaderboard(username: str, space_url: str, answers_json: str):
240
  **Score:** {score:.1%}
241
  **Correct:** {correct}/{total}
242
 
243
- {'🏆 Congratulations! You passed the 30% threshold!' if score >= 0.3 else '📈 Keep improving! You need 30% to earn your certificate.'}
244
  {warning_text}
245
 
246
  Check the [leaderboard](https://huggingface.co/spaces/agents-course/Students_leaderboard) to see your ranking!
 
240
  **Score:** {score:.1%}
241
  **Correct:** {correct}/{total}
242
 
243
+ {'🏆 **Congratulations!** Your agent scored above 30% and has earned the certificate!' if score > 0.3 else ' **Certificate Requirement:** Your agent must score above 30% to earn your certificate. Current score is below the threshold.'}
244
  {warning_text}
245
 
246
  Check the [leaderboard](https://huggingface.co/spaces/agents-course/Students_leaderboard) to see your ranking!