#!/usr/bin/env python3 import re def strip_final_answer_prefix(s: str) -> str: """Current function from app.py""" return re.sub(r"FINAL ANSWER:\s*", "", s, flags=re.IGNORECASE).strip() def test_answer_cleaning(): """Test the answer cleaning function""" print("=" * 60) print("Testing answer cleaning function...") print("=" * 60) test_cases = [ "FINAL ANSWER: 42", "final answer: hello world", "This is my answer without prefix", "FINAL ANSWER:42", "The reasoning... FINAL ANSWER: The correct answer is Paris", "FINAL ANSWER: The answer is 123", "My reasoning leads me to conclude that FINAL ANSWER: 456", "Let me think step by step.\n\nFirst, I need to calculate 2+2.\n\nFINAL ANSWER: 4" ] for i, test in enumerate(test_cases, 1): cleaned = strip_final_answer_prefix(test) print(f"{i}. Input: '{test}'") print(f" Output: '{cleaned}'") print(f" Length: {len(cleaned)} chars") if cleaned == test: print(" WARNING: No cleaning occurred! Prefix not found or regex failed.") print("-" * 50) def debug_regex_issue(): """Debug potential regex issues""" print("\n" + "=" * 60) print("Debugging regex patterns...") print("=" * 60) test_response = "Based on my calculation, FINAL ANSWER: 4" # Test different regex patterns patterns = [ (r"FINAL ANSWER:\s*", "Current pattern"), (r"FINAL ANSWER:\s*(.*)$", "Capture group pattern"), (r".*FINAL ANSWER:\s*", "Remove everything up to prefix"), (r"^.*FINAL ANSWER:\s*", "From start to prefix"), ] for i, (pattern, description) in enumerate(patterns, 1): try: result = re.sub(pattern, "", test_response, flags=re.IGNORECASE).strip() print(f"{i}. {description}") print(f" Pattern: {pattern}") print(f" Result: '{result}'") print(f" Length: {len(result)}") except Exception as e: print(f"{i}. {description} - ERROR: {e}") print("-" * 40) def test_submission_format(): """Test what would be sent to API""" print("\n" + "=" * 60) print("Testing submission format...") print("=" * 60) # Simulate agent responses agent_responses = [ "Let me calculate this step by step.\n\n2 + 2 = 4\n\nFINAL ANSWER: 4", "FINAL ANSWER: Paris", "After searching, I found that FINAL ANSWER: The answer is 42", "No final answer format here, just a direct response" ] for i, response in enumerate(agent_responses, 1): cleaned = strip_final_answer_prefix(response) payload_item = {"task_id": f"test_{i:03d}", "submitted_answer": cleaned} print(f"Response {i}:") print(f" Original: '{response[:100]}{'...' if len(response) > 100 else ''}'") print(f" Cleaned: '{cleaned}'") print(f" Payload: {payload_item}") print(f" Empty?: {len(cleaned.strip()) == 0}") print("-" * 40) if __name__ == "__main__": test_answer_cleaning() debug_regex_issue() test_submission_format()