Spaces:

gabejavitt
/

agentCourse

Sleeping

App Files Files Community

gabejavitt commited on Feb 10

Commit

ac72f74

verified ·

1 Parent(s): f6e496f

Update app.py

Browse files

Files changed (1) hide show

app.py +193 -133

app.py CHANGED Viewed

@@ -97,6 +97,80 @@ def retry_with_backoff(max_retries=None, base_delay=None):
         return wrapper
     return decorator
 class SearchCache:
     """LRU cache for search results"""
@@ -517,120 +591,94 @@ class ValidateInput(BaseModel):
     proposed_answer: str = Field(description="Answer to validate")
     original_question: str = Field(description="Original question (first 100 chars)")
-@tool(args_schema=ValidateInput)
-def validate_answer(proposed_answer: str, original_question: str) -> str:
     """
-    ENHANCED: Validate answer before submission with comprehensive checks.
-    ALWAYS use before final_answer_tool.
     """
     start_time = time.time()
     try:
-        print(f"✓ Validating: '{proposed_answer[:50]}...'")
-        issues = []
         warnings = []
-        suggestions = []
-        # 1. Check conversational fluff
-        fluff = ["the answer is", "based on", "according to", "i found", "here is",
-                 "here's", "after searching", "from my research", "the result is"]
-        if any(p in proposed_answer.lower() for p in fluff):
-            issues.append("❌ Remove conversational text - answer ONLY")
-        # 2. Check code fences
-        if "```" in proposed_answer:
-            issues.append("❌ Remove code fences (```)")
-        # 3. Check markdown formatting
-        if proposed_answer.startswith('#') or '**' in proposed_answer:
-            issues.append("❌ Remove markdown formatting")
-        # 4. Check length appropriateness
-        question_lower = original_question.lower()
-        if len(proposed_answer) > 500:
-            if not any(k in question_lower for k in ['explain', 'describe', 'why', 'how does']):
-                warnings.append("⚠️ Answer very long. Question asks for short answer?")
-        # 5. Check for number questions
-        number_keywords = ["how many", "what number", "count", "total", "sum",
-                          "what year", "when did", "what date"]
-        if any(k in question_lower for k in number_keywords):
-            if not any(c.isdigit() for c in proposed_answer):
-                issues.append("❌ Question asks for number but answer has no digits")
-            else:
-                # Extract just the number(s)
-                import re
-                numbers = re.findall(r'\d+(?:\.\d+)?', proposed_answer)
-                if numbers and len(proposed_answer) > 50:
-                    suggestions.append(f"💡 Consider just the number(s): {', '.join(numbers)}")
-        # 6. Check for list questions
-        list_keywords = ["list", "what are", "name the", "which"]
-        if any(k in question_lower for k in list_keywords):
-            if '\n' in proposed_answer or len(proposed_answer.split(',')) > 1:
-                # Good, it's formatted as a list
-                pass
-            else:
-                warnings.append("⚠️ Question might ask for multiple items")
-        # 7. Check for yes/no questions
-        if question_lower.startswith(('is ', 'does ', 'did ', 'can ', 'will ', 'was ', 'were ', 'are ')):
-            if proposed_answer.lower() not in ['yes', 'no', 'true', 'false']:
-                if not proposed_answer.lower().startswith(('yes', 'no')):
-                    warnings.append("⚠️ Question seems yes/no. Answer should start with yes/no?")
-        # 8. Check for excessive punctuation
-        if proposed_answer.count('!') > 2 or proposed_answer.count('?') > 1:
-            issues.append("❌ Remove excessive punctuation")
-        # 9. Check for quotes around answer
-        if (proposed_answer.startswith('"') and proposed_answer.endswith('"')) or \
-           (proposed_answer.startswith("'") and proposed_answer.endswith("'")):
-            suggestions.append("💡 Consider removing quotes around answer")
-        # 10. Check for multiple sentences when one expected
-        sentences = [s.strip() for s in proposed_answer.split('.') if s.strip()]
-        if len(sentences) > 3:
-            if not any(k in question_lower for k in ['explain', 'describe', 'why', 'how']):
-                warnings.append("⚠️ Multiple sentences. Question asks for simple answer?")
-        # 11. Sanity check: is it empty?
-        if not proposed_answer.strip():
-            issues.append("❌ Answer is empty!")
-        # 12. Check for units in measurement questions
-        unit_keywords = ['height', 'weight', 'distance', 'speed', 'temperature', 'size']
-        if any(k in question_lower for k in unit_keywords):
-            has_unit = any(u in proposed_answer.lower() for u in
-                         ['km', 'miles', 'kg', 'lbs', 'cm', 'inches', 'celsius',
-                          'fahrenheit', 'mph', 'kph', 'meters', 'feet'])
-            if not has_unit and any(c.isdigit() for c in proposed_answer):
-                warnings.append("⚠️ Measurement question but no unit found")
-        # Build response
-        if issues:
-            result = "🚫 VALIDATION FAILED:\n" + "\n".join(issues)
-            if suggestions:
-                result += "\n\nSuggestions:\n" + "\n".join(suggestions)
-            result += "\n\nFix issues then retry validation."
-        elif warnings:
-            result = "⚠️ WARNINGS:\n" + "\n".join(warnings)
-            if suggestions:
-                result += "\n\nSuggestions:\n" + "\n".join(suggestions)
-            result += "\n\nProceed if confident, or refine answer."
-        elif suggestions:
-            result = "✅ PASSED with suggestions:\n" + "\n".join(suggestions)
-            result += "\n\nCall final_answer_tool() when ready."
         else:
-            result = "✅ VALIDATION PASSED! Call final_answer_tool() now."
         telemetry.record_call("validate_answer", time.time() - start_time, True)
         return result
     except Exception as e:
         telemetry.record_call("validate_answer", time.time() - start_time, False)
-        raise
 # =============================================================================
 # CORE TOOLS
@@ -1882,16 +1930,28 @@ class FinalAnswerInput(BaseModel):
 @tool(args_schema=FinalAnswerInput)
 def final_answer_tool(answer: str) -> str:
-    """Submit final answer"""
     start_time = time.time()
     try:
-        print(f"✅ FINAL ANSWER: {answer}")
         telemetry.record_call("final_answer_tool", time.time() - start_time, True)
-        return answer
     except Exception as e:
         telemetry.record_call("final_answer_tool", time.time() - start_time, False)
-        raise
 # =============================================================================
@@ -2297,34 +2357,34 @@ REMEMBER: One tool per turn. No reasoning without tools. Exact answer format.
         self.current_llm = "groq"
         def prune_context_if_needed(state: AgentState) -> AgentState:
-        """
-        Prune conversation history if it's getting too long.
-        Keeps system message + recent history to stay under token limits.
-        """
-        messages = state.get("messages", [])
-        # Keep first message (system prompt) + last N messages
-        MAX_MESSAGES = 20  # Adjust based on your needs
-        if len(messages) > MAX_MESSAGES:
-            print(f"⚠️ Context pruning: {len(messages)} messages → {MAX_MESSAGES}")
-            # Always keep system message (if it exists)
-            system_msg = None
-            if messages and isinstance(messages[0], SystemMessage):
-                system_msg = messages[0]
-                messages = messages[1:]
-            # Keep only recent messages
-            recent_messages = messages[-(MAX_MESSAGES-1):]
-            # Reconstruct
-            if system_msg:
-                state["messages"] = [system_msg] + recent_messages
-            else:
-                state["messages"] = recent_messages
-        return state
         # Build agent graph
         def agent_node(state: AgentState):

         return wrapper
     return decorator
+def normalize_answer(answer: str, question: str = "") -> str:
+    """
+    Normalize answer to match expected format.
+    Args:
+        answer: The answer to normalize
+        question: Optional question text to determine if order matters
+    """
+    if not answer:
+        return answer
+    original = answer
+    answer = answer.strip()
+    # Remove common prefixes
+    prefixes_to_remove = [
+        "the answer is:",
+        "the answer is",
+        "answer:",
+        "final answer:",
+        "result:",
+    ]
+    for prefix in prefixes_to_remove:
+        if answer.lower().startswith(prefix):
+            answer = answer[len(prefix):].strip()
+    # Handle lists
+    if "," in answer:
+        items = [item.strip() for item in answer.split(",")]
+        items = [item for item in items if item]
+        # Determine if order matters based on question
+        order_matters_keywords = [
+            "first", "last", "before", "after", "sequence",
+            "order", "chronological", "oldest", "newest",
+            "in the form", "format"
+        ]
+        order_matters = any(kw in question.lower() for kw in order_matters_keywords)
+        if not order_matters:
+            # Sort alphabetically for consistency
+            items.sort()
+            print(f"   📋 Sorted list alphabetically (order doesn't seem to matter)")
+        else:
+            print(f"   📋 Kept original order (question specifies order)")
+        # Normalize each item
+        items = [item.strip().rstrip('.') for item in items]
+        # Consistent spacing
+        answer = ", ".join(items)
+    # Single word capitalization
+    if len(answer.split()) == 1:
+        if answer.lower() in ['right', 'left', 'yes', 'no', 'true', 'false']:
+            answer = answer.capitalize()
+    # Handle "St." vs "Saint"
+    if "without abbreviations" in question.lower():
+        answer = answer.replace("St.", "Saint")
+        answer = answer.replace("Dr.", "Doctor")
+        answer = answer.replace("Mt.", "Mount")
+    # Remove trailing period (unless decimal)
+    if answer.endswith('.') and not (len(answer) > 1 and answer[-2].isdigit()):
+        answer = answer[:-1]
+    # Remove wrapping quotes
+    if (answer.startswith('"') and answer.endswith('"')) or \
+       (answer.startswith("'") and answer.endswith("'")):
+        answer = answer[1:-1]
+    return answer
 class SearchCache:
     """LRU cache for search results"""
     proposed_answer: str = Field(description="Answer to validate")
     original_question: str = Field(description="Original question (first 100 chars)")
+@tool(args_schema=ValidateAnswerInput)
+def validate_answer(answer: str) -> str:
     """
+    Validate answer format and provide warnings.
+    Returns validation result with normalization suggestions.
     """
     start_time = time.time()
     try:
+        print(f"✓ Validating: '{answer[:50]}...'")
         warnings = []
+        errors = []
+        normalization_needed = []
+        # Normalize for validation
+        normalized = normalize_answer(answer)
+        if normalized != answer:
+            normalization_needed.append(f"Consider using normalized form: '{normalized}'")
+        # Check 1: Empty answer
+        if not answer or not answer.strip():
+            errors.append("Answer is empty")
+        # Check 2: Too long (probably explaining instead of answering)
+        if len(answer) > 200:
+            warnings.append("Answer is very long (>200 chars). Consider if question asks for brief response.")
+        # Check 3: Contains question words
+        question_words = ['what', 'who', 'when', 'where', 'why', 'how', 'which']
+        if any(word in answer.lower() for word in question_words):
+            warnings.append("Answer contains question words. Make sure you're providing the answer, not rephrasing the question.")
+        # Check 4: List ordering
+        if "," in answer:
+            items = [item.strip() for item in answer.split(",")]
+            if len(items) > 1:
+                warnings.append(f"List detected with {len(items)} items. Verify order matches question requirements.")
+        # Check 5: Capitalization consistency
+        if answer.lower() in ['right', 'left', 'yes', 'no', 'true', 'false']:
+            if not answer[0].isupper():
+                normalization_needed.append(f"Consider capitalizing: '{answer.capitalize()}'")
+        # Check 6: Abbreviations
+        if any(abbrev in answer.lower() for abbrev in ['st.', 'dr.', 'mt.']):
+            if "without abbreviations" in str(answer).lower() or "full" in str(answer).lower():
+                warnings.append("Question may ask for full form without abbreviations")
+        # Check 7: Spacing in lists
+        if "," in answer:
+            # Check for inconsistent spacing
+            if ", " in answer and "," in answer.replace(", ", ""):
+                normalization_needed.append("Inconsistent spacing in list. Use consistent ', ' format")
+        # Build result
+        result_parts = []
+        if errors:
+            result_parts.append("🚫 VALIDATION FAILED:")
+            for error in errors:
+                result_parts.append(f"❌ {error}")
+            result_parts.append("Fix issues then retry validation.")
         else:
+            result_parts.append("✅ VALIDATION PASSED!")
+            if normalization_needed:
+                result_parts.append("\n💡 NORMALIZATION SUGGESTIONS:")
+                for suggestion in normalization_needed:
+                    result_parts.append(f"   • {suggestion}")
+            if warnings:
+                result_parts.append("\n⚠️ WARNINGS:")
+                for warning in warnings:
+                    result_parts.append(f"⚠️ {warning}")
+                result_parts.append("Proceed if confident, or refine answer.")
+            else:
+                result_parts.append("Call final_answer_tool() now.")
+        result = "\n".join(result_parts)
         telemetry.record_call("validate_answer", time.time() - start_time, True)
         return result
     except Exception as e:
         telemetry.record_call("validate_answer", time.time() - start_time, False)
+        raise ToolError("validate_answer", e)
 # =============================================================================
 # CORE TOOLS
 @tool(args_schema=FinalAnswerInput)
 def final_answer_tool(answer: str) -> str:
+    """Submit final answer with normalization"""
     start_time = time.time()
     try:
+        # Get question from state (you'll need to pass this through)
+        # For now, normalize without question context
+        original_answer = answer
+        answer = normalize_answer(answer)
+        if answer != original_answer:
+            print(f"📝 Normalized answer:")
+            print(f"   Before: '{original_answer}'")
+            print(f"   After:  '{answer}'")
+        print(f"\n✅ FINAL: '{answer}'\n")
         telemetry.record_call("final_answer_tool", time.time() - start_time, True)
+        return f"FINAL_ANSWER: {answer}"
     except Exception as e:
         telemetry.record_call("final_answer_tool", time.time() - start_time, False)
+        raise ToolError("final_answer_tool", e)
 # =============================================================================
         self.current_llm = "groq"
         def prune_context_if_needed(state: AgentState) -> AgentState:
+            """
+            Prune conversation history if it's getting too long.
+            Keeps system message + recent history to stay under token limits.
+            """
+            messages = state.get("messages", [])
+            # Keep first message (system prompt) + last N messages
+            MAX_MESSAGES = 20  # Adjust based on your needs
+            if len(messages) > MAX_MESSAGES:
+                print(f"⚠️ Context pruning: {len(messages)} messages → {MAX_MESSAGES}")
+                # Always keep system message (if it exists)
+                system_msg = None
+                if messages and isinstance(messages[0], SystemMessage):
+                    system_msg = messages[0]
+                    messages = messages[1:]
+                # Keep only recent messages
+                recent_messages = messages[-(MAX_MESSAGES-1):]
+                # Reconstruct
+                if system_msg:
+                    state["messages"] = [system_msg] + recent_messages
+                else:
+                    state["messages"] = recent_messages
+            return state
         # Build agent graph
         def agent_node(state: AgentState):