Spaces:

jebaselvasingh
/

mycertification

Sleeping

App Files Files Community

jebaponselvasingh commited on Jan 16

Commit

e0ff305

1 Parent(s): e95a92e

Add application file

Browse files

Files changed (2) hide show

agent_enhanced.py +202 -33
app.py +1 -1

agent_enhanced.py CHANGED Viewed

@@ -313,34 +313,108 @@ Provide PRECISE, EXACT answers. The benchmark uses EXACT STRING MATCHING, so you
    - ✅ CORRECT: "Paris"
    - ❌ WRONG: "The answer is Paris because..."
-## Problem-Solving Strategy
-1. **Understand**: Read the question carefully. What exactly is being asked? Note any specific format requirements.
-2. **Check for File**: If a file is mentioned or available, ALWAYS read it FIRST - the answer is likely there.
-3. **Plan**: What information do I need? Which tools should I use?
-4. **Execute**: Use tools systematically. Verify information from multiple sources when possible.
-5. **Verify**: Double-check your answer format. Does it match the question's requirements? Is spelling correct?
-6. **Respond**: Give ONLY the final answer, no prefixes, no explanations.
 ## Available Tools
 - `read_file`: Read PDFs, spreadsheets, text files - USE THIS FIRST if a file is available
-- `web_search`: Current information, recent events, facts
-- `wikipedia_search`: Historical facts, biographies, definitions
-- `python_executor`: Calculations, data processing, analysis
-- `calculator`: Quick mathematical calculations
-## Tool Usage Priority
-1. **If file available**: Read file FIRST before doing anything else
-2. **For calculations**: Use python_executor for complex math, calculator for simple expressions
-3. **For facts**: Use wikipedia_search for established facts, web_search for current/recent information
-4. **Cross-reference**: When possible, verify important facts from multiple sources
 ## Critical Reminders
 - NEVER include "FINAL ANSWER:" or any prefix in your response
 - NEVER add explanations or context to your final answer
 - ALWAYS verify spelling, capitalization, and formatting
-- ALWAYS read files first if they are available
 - If uncertain about format, look for clues in the question itself
 - Never guess - use tools to find accurate information
 Remember: Your final message must contain ONLY the answer, nothing else. The scoring system uses exact string matching."""
@@ -354,7 +428,7 @@ class GAIAAgent:
         model_name: str = "gpt-4o",
         api_key: str = None,
         temperature: float = 0,
-        max_iterations: int = 15
     ):
         """
         Initialize the GAIA agent.
@@ -407,12 +481,15 @@ class GAIAAgent:
         messages = state["messages"]
         iteration = state.get("iteration_count", 0)
-        # Add iteration warnings earlier to give agent more time to finish
-        if iteration >= self.max_iterations - 3:
-            warning_msg = "WARNING: Approaching iteration limit. Please provide your final answer now. Remember: just the answer, no prefix."
             messages = list(messages) + [SystemMessage(content=warning_msg)]
         elif iteration >= self.max_iterations - 5:
-            reminder_msg = "Reminder: When you're ready to answer, provide ONLY the final answer with no prefix like 'FINAL ANSWER:' or 'The answer is:'"
             messages = list(messages) + [SystemMessage(content=reminder_msg)]
         try:
@@ -447,15 +524,46 @@ class GAIAAgent:
     def _extract_answer_node(self, state: AgentState) -> dict:
         """Extract and clean the final answer."""
-        last_message = state["messages"][-1]
         content = last_message.content if hasattr(last_message, "content") else str(last_message)
         answer = self._clean_answer(content)
         return {"final_answer": answer}
     def _clean_answer(self, raw_answer: str) -> str:
         """Clean and format the final answer for exact matching."""
         answer = raw_answer.strip()
         # Remove common prefixes (case-insensitive, with variations)
@@ -468,33 +576,78 @@ class GAIAAgent:
             "solution:", "solution", "solution is:",
             "the solution is:", "the solution is",
             "it is", "it's", "that is", "that's",
         ]
         answer_lower = answer.lower()
         for prefix in prefixes:
             if answer_lower.startswith(prefix):
                 answer = answer[len(prefix):].strip()
-                # Remove any leading colon or dash
-                answer = answer.lstrip(':').lstrip('-').strip()
                 answer_lower = answer.lower()
         # Remove quotes if they wrap the entire answer
         if (answer.startswith('"') and answer.endswith('"')) or \
            (answer.startswith("'") and answer.endswith("'")):
             answer = answer[1:-1].strip()
         # Remove trailing periods, commas, or semicolons for single-word/number answers
         if answer and ' ' not in answer:
-            answer = answer.rstrip('.,;:')
         # Remove leading/trailing whitespace and normalize internal whitespace
-        answer = ' '.join(answer.split())
         # Remove markdown formatting if present
         if answer.startswith('**') and answer.endswith('**'):
-            answer = answer[2:-2]
-        if answer.startswith('*') and answer.endswith('*'):
-            answer = answer[1:-1]
         return answer.strip()
@@ -513,8 +666,24 @@ class GAIAAgent:
         # Prepare the user message with file priority
         user_content = question
         if file_path and os.path.exists(file_path):
-            # Strongly emphasize reading the file first
-            user_content = f"[IMPORTANT: A file is available at {file_path}]\n\nYou MUST read this file FIRST using the read_file tool before attempting to answer. The answer is very likely contained in this file.\n\nQuestion: {question}"
         # Initialize state
         initial_state: AgentState = {

    - ✅ CORRECT: "Paris"
    - ❌ WRONG: "The answer is Paris because..."
+## Detailed Problem-Solving Strategy
+### Step 1: Analyze the Question
+- Read the question word-by-word. What exactly is being asked?
+- Identify keywords: "what", "who", "when", "where", "how many", "calculate", "find"
+- Note any format requirements or constraints mentioned in the question
+- Check if the question references specific data, files, or time periods
+### Step 2: File Priority (CRITICAL)
+- If a file is mentioned or available, you MUST read it FIRST before any other action
+- Files often contain the exact answer or the data needed to calculate it
+- After reading the file, carefully search through ALL content - don't miss details
+- For Excel/CSV files, examine ALL sheets and ALL columns
+- For PDFs, read ALL pages - answers can be anywhere in the document
+### Step 3: Plan Your Approach
+- Based on the question type, decide which tools you need:
+  - **Data extraction from file**: read_file (then possibly python_executor for analysis)
+  - **Mathematical calculations**: python_executor or calculator
+  - **Historical/factual information**: wikipedia_search first, then web_search if needed
+  - **Current/recent information**: web_search
+  - **Complex data analysis**: python_executor with pandas/numpy
+- Create a step-by-step plan before executing
+### Step 4: Execute Systematically
+- Use ONE tool at a time, wait for results
+- For file-based questions: read file → extract relevant data → calculate/analyze → verify
+- For fact-based questions: search → verify from multiple sources if possible → extract exact answer
+- For calculation questions: gather inputs → perform calculation → double-check math
+- If initial search doesn't yield results, try different query keywords
+### Step 5: Verify and Cross-Check
+- Verify your answer matches what was asked
+- For names: double-check spelling, capitalization, punctuation
+- For numbers: verify calculations, check units, ensure precision
+- For dates: verify format matches question requirements
+- If you found information from one source, try to verify with another if time permits
+- For lists: ensure proper comma-separated format with NO spaces
+### Step 6: Format Correctly
+- Remove ALL prefixes ("FINAL ANSWER:", "The answer is:", etc.)
+- Remove ALL explanations and context
+- Ensure exact formatting (spaces, commas, capitalization)
+- Double-check: is this the EXACT format the question expects?
 ## Available Tools
 - `read_file`: Read PDFs, spreadsheets, text files - USE THIS FIRST if a file is available
+- `web_search`: Current information, recent events, facts (use for recent/current info)
+- `wikipedia_search`: Historical facts, biographies, definitions (use for established facts)
+- `python_executor`: Calculations, data processing, analysis (use for complex calculations or data analysis)
+- `calculator`: Quick mathematical calculations (use for simple arithmetic)
+## Tool Usage Guidelines
+### Reading Files (HIGHEST PRIORITY)
+- ALWAYS read files FIRST if available
+- For Excel files: check ALL sheets, read ALL relevant columns
+- For PDFs: read ALL pages, search for keywords from the question
+- For CSV files: examine ALL rows, look for patterns
+- Extract numbers, names, dates EXACTLY as they appear
+### Web Search Strategy
+- Use specific, targeted queries with key terms from the question
+- If first search doesn't help, try rephrasing with different keywords
+- Look for official sources, authoritative websites
+- Extract exact values (numbers, names) - don't round or approximate
+### Wikipedia Search Strategy
+- Use exact terms or names from the question
+- Read the summary/intro carefully - it often contains the answer
+- Check spelling, capitalization, dates exactly as shown
+- For biographical questions, search for the person's name
+### Python Execution
+- Use for calculations, data analysis, or processing file contents
+- Be explicit with calculations - show your work in code
+- Use appropriate precision - don't round unnecessarily
+- Print the final result clearly
+### Calculator
+- Use for simple arithmetic operations
+- Preserve precision - use exact fractions if possible
+- Format output correctly (integers as integers, decimals as needed)
 ## Critical Reminders
 - NEVER include "FINAL ANSWER:" or any prefix in your response
 - NEVER add explanations or context to your final answer
 - ALWAYS verify spelling, capitalization, and formatting
+- ALWAYS read files first if they are available - don't skip this step
+- For file-based questions, the answer is almost always in the file
+- Extract exact values - don't approximate or round unless necessary
 - If uncertain about format, look for clues in the question itself
 - Never guess - use tools to find accurate information
+- Use multiple tools if needed - don't stop after the first result if unsure
+- Cross-reference important facts when possible
+## When You're Ready to Answer
+- Review your final answer one more time
+- Ensure it's formatted correctly (no prefixes, no explanations)
+- Ensure spelling, capitalization, and punctuation are exact
+- Ensure numbers are precise
+- When satisfied, respond with ONLY the answer - nothing else
 Remember: Your final message must contain ONLY the answer, nothing else. The scoring system uses exact string matching."""
         model_name: str = "gpt-4o",
         api_key: str = None,
         temperature: float = 0,
+        max_iterations: int = 25
     ):
         """
         Initialize the GAIA agent.
         messages = state["messages"]
         iteration = state.get("iteration_count", 0)
+        # Add iteration warnings to guide the agent
+        if iteration >= self.max_iterations - 2:
+            warning_msg = "⚠️ CRITICAL: You have reached the iteration limit. You MUST provide your final answer NOW in your next response. Format: ONLY the answer itself, no prefixes like 'FINAL ANSWER:' or 'The answer is:' - just the answer."
             messages = list(messages) + [SystemMessage(content=warning_msg)]
         elif iteration >= self.max_iterations - 5:
+            warning_msg = "⚠️ WARNING: Approaching iteration limit. Start wrapping up and provide your final answer soon. Remember: just the answer, no prefix."
+            messages = list(messages) + [SystemMessage(content=warning_msg)]
+        elif iteration >= self.max_iterations - 8:
+            reminder_msg = "Reminder: When you're ready to answer, provide ONLY the final answer with no prefix like 'FINAL ANSWER:' or 'The answer is:'. Check your answer format carefully."
             messages = list(messages) + [SystemMessage(content=reminder_msg)]
         try:
     def _extract_answer_node(self, state: AgentState) -> dict:
         """Extract and clean the final answer."""
+        # Try to find the answer in the last few messages
+        messages = state["messages"]
+        # Look for answer in last message first
+        last_message = messages[-1]
         content = last_message.content if hasattr(last_message, "content") else str(last_message)
+        # If last message is empty or doesn't contain clear answer, check previous messages
+        if not content or len(content.strip()) < 3:
+            # Look backwards through messages for the last non-empty content
+            for msg in reversed(messages[:-1]):
+                msg_content = msg.content if hasattr(msg, "content") else str(msg)
+                if msg_content and len(msg_content.strip()) >= 3:
+                    content = msg_content
+                    break
+        # Also check if we have tool results that might contain the answer
+        # Look for tool results in recent messages
+        for msg in reversed(messages[-5:]):  # Check last 5 messages
+            if hasattr(msg, "content") and msg.content:
+                # Sometimes answers are in tool responses
+                if "result" in msg.content.lower() or "answer" in msg.content.lower():
+                    # Extract potential answer from tool response
+                    lines = msg.content.split('\n')
+                    for line in lines:
+                        line_lower = line.lower()
+                        if any(word in line_lower for word in ["the answer is", "result is", "found:", "value:", "equals"]):
+                            # Try to extract just the answer part
+                            content = line
+                            break
         answer = self._clean_answer(content)
         return {"final_answer": answer}
     def _clean_answer(self, raw_answer: str) -> str:
         """Clean and format the final answer for exact matching."""
+        if not raw_answer:
+            return ""
         answer = raw_answer.strip()
         # Remove common prefixes (case-insensitive, with variations)
             "solution:", "solution", "solution is:",
             "the solution is:", "the solution is",
             "it is", "it's", "that is", "that's",
+            "the value is:", "the value is", "value is:",
+            "the result is:", "the result is",
+            "found:", "found", "equals:", "equals", "is:",
+            "according to the", "based on the", "from the",
         ]
         answer_lower = answer.lower()
         for prefix in prefixes:
             if answer_lower.startswith(prefix):
                 answer = answer[len(prefix):].strip()
+                # Remove any leading colon, dash, or space
+                answer = answer.lstrip(':').lstrip('-').lstrip().strip()
                 answer_lower = answer.lower()
+        # Remove explanations after the answer (look for common patterns)
+        # Split by common explanation starters
+        explanation_markers = [" because", " since", " as", " due to", " which", " that", " - ", " (", " [", "\n\n"]
+        for marker in explanation_markers:
+            if marker in answer:
+                # For some markers, split and take first part
+                if marker in [" - ", "\n\n"]:
+                    answer = answer.split(marker)[0].strip()
+                # For parentheses/brackets, be more careful
+                elif marker in [" (", " ["]:
+                    # Only remove if it looks like an explanation
+                    idx = answer.find(marker)
+                    if idx > 0 and idx < len(answer) - 3:  # Not at start/end
+                        # Check if it's likely an explanation (has words, not just numbers/dates)
+                        rest = answer[idx+1:]
+                        if any(char.isalpha() for char in rest[:20]):  # Has letters in first 20 chars
+                            answer = answer[:idx].strip()
+                else:
+                    # For words like "because", split and take first part
+                    parts = answer.split(marker, 1)
+                    if len(parts) > 1:
+                        answer = parts[0].strip()
         # Remove quotes if they wrap the entire answer
         if (answer.startswith('"') and answer.endswith('"')) or \
            (answer.startswith("'") and answer.endswith("'")):
             answer = answer[1:-1].strip()
         # Remove trailing periods, commas, or semicolons for single-word/number answers
+        # But preserve trailing punctuation for dates or other formatted answers
         if answer and ' ' not in answer:
+            # Don't remove trailing punctuation if it's part of a date format or URL
+            if not (answer.count('-') == 2 or answer.count('/') == 2 or '://' in answer):
+                answer = answer.rstrip('.,;:')
         # Remove leading/trailing whitespace and normalize internal whitespace
+        # But preserve formatting for lists (comma-separated)
+        if ',' in answer and ' ' not in answer.replace(',', '').replace(' ', ''):
+            # Comma-separated list without spaces - keep as is
+            answer = answer.strip()
+        else:
+            answer = ' '.join(answer.split())
         # Remove markdown formatting if present
         if answer.startswith('**') and answer.endswith('**'):
+            answer = answer[2:-2].strip()
+        if answer.startswith('*') and answer.endswith('*') and not answer.startswith('**'):
+            answer = answer[1:-1].strip()
+        # Remove code block markers if present
+        if answer.startswith('```') and answer.endswith('```'):
+            lines = answer.split('\n')
+            if len(lines) > 2:
+                answer = '\n'.join(lines[1:-1]).strip()
+        # Final cleanup: remove any remaining explanation patterns at the end
+        answer = answer.split('\n')[0].strip()  # Take first line only
+        answer = answer.split('.')[0].strip() if answer.count('.') > 1 else answer  # Take first sentence if multiple
         return answer.strip()
         # Prepare the user message with file priority
         user_content = question
         if file_path and os.path.exists(file_path):
+            # Strongly emphasize reading the file first with detailed instructions
+            file_extension = os.path.splitext(file_path)[1].lower()
+            file_instructions = ""
+            if file_extension in ['.xlsx', '.xls', '.csv']:
+                file_instructions = "This is a spreadsheet file. Read it completely and examine ALL sheets (if Excel) and ALL columns. The answer is likely a number, date, name, or value extracted from this data. After reading, you may need to perform calculations or analysis using python_executor."
+            elif file_extension == '.pdf':
+                file_instructions = "This is a PDF file. Read ALL pages carefully. The answer may be anywhere in the document - in tables, text, or images. Search for keywords from the question."
+            else:
+                file_instructions = "This is a text-based file. Read it completely and carefully. The answer is likely somewhere in this file - look for exact values, names, dates, or information that matches the question."
+            user_content = f"""CRITICAL: A file is available at {file_path}
+{file_instructions}
+**You MUST read this file FIRST before doing anything else.** Do not search the web or use other tools until you have read the file completely. The answer is very likely in this file.
+Question: {question}"""
         # Initialize state
         initial_state: AgentState = {

app.py CHANGED Viewed

@@ -240,7 +240,7 @@ def submit_to_leaderboard(username: str, space_url: str, answers_json: str):
 **Score:** {score:.1%}
 **Correct:** {correct}/{total}
-{'🏆 Congratulations! You passed the 30% threshold!' if score >= 0.3 else '📈 Keep improving! You need 30% to earn your certificate.'}
 {warning_text}
 Check the [leaderboard](https://huggingface.co/spaces/agents-course/Students_leaderboard) to see your ranking!

 **Score:** {score:.1%}
 **Correct:** {correct}/{total}
+{'🏆 **Congratulations!** Your agent scored above 30% and has earned the certificate!' if score > 0.3 else '❌ **Certificate Requirement:** Your agent must score above 30% to earn your certificate. Current score is below the threshold.'}
 {warning_text}
 Check the [leaderboard](https://huggingface.co/spaces/agents-course/Students_leaderboard) to see your ranking!