Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| import re | |
| def strip_final_answer_prefix(s: str) -> str: | |
| """Current function from app.py""" | |
| return re.sub(r"FINAL ANSWER:\s*", "", s, flags=re.IGNORECASE).strip() | |
| def test_answer_cleaning(): | |
| """Test the answer cleaning function""" | |
| print("=" * 60) | |
| print("Testing answer cleaning function...") | |
| print("=" * 60) | |
| test_cases = [ | |
| "FINAL ANSWER: 42", | |
| "final answer: hello world", | |
| "This is my answer without prefix", | |
| "FINAL ANSWER:42", | |
| "The reasoning... FINAL ANSWER: The correct answer is Paris", | |
| "FINAL ANSWER: The answer is 123", | |
| "My reasoning leads me to conclude that FINAL ANSWER: 456", | |
| "Let me think step by step.\n\nFirst, I need to calculate 2+2.\n\nFINAL ANSWER: 4" | |
| ] | |
| for i, test in enumerate(test_cases, 1): | |
| cleaned = strip_final_answer_prefix(test) | |
| print(f"{i}. Input: '{test}'") | |
| print(f" Output: '{cleaned}'") | |
| print(f" Length: {len(cleaned)} chars") | |
| if cleaned == test: | |
| print(" WARNING: No cleaning occurred! Prefix not found or regex failed.") | |
| print("-" * 50) | |
| def debug_regex_issue(): | |
| """Debug potential regex issues""" | |
| print("\n" + "=" * 60) | |
| print("Debugging regex patterns...") | |
| print("=" * 60) | |
| test_response = "Based on my calculation, FINAL ANSWER: 4" | |
| # Test different regex patterns | |
| patterns = [ | |
| (r"FINAL ANSWER:\s*", "Current pattern"), | |
| (r"FINAL ANSWER:\s*(.*)$", "Capture group pattern"), | |
| (r".*FINAL ANSWER:\s*", "Remove everything up to prefix"), | |
| (r"^.*FINAL ANSWER:\s*", "From start to prefix"), | |
| ] | |
| for i, (pattern, description) in enumerate(patterns, 1): | |
| try: | |
| result = re.sub(pattern, "", test_response, flags=re.IGNORECASE).strip() | |
| print(f"{i}. {description}") | |
| print(f" Pattern: {pattern}") | |
| print(f" Result: '{result}'") | |
| print(f" Length: {len(result)}") | |
| except Exception as e: | |
| print(f"{i}. {description} - ERROR: {e}") | |
| print("-" * 40) | |
| def test_submission_format(): | |
| """Test what would be sent to API""" | |
| print("\n" + "=" * 60) | |
| print("Testing submission format...") | |
| print("=" * 60) | |
| # Simulate agent responses | |
| agent_responses = [ | |
| "Let me calculate this step by step.\n\n2 + 2 = 4\n\nFINAL ANSWER: 4", | |
| "FINAL ANSWER: Paris", | |
| "After searching, I found that FINAL ANSWER: The answer is 42", | |
| "No final answer format here, just a direct response" | |
| ] | |
| for i, response in enumerate(agent_responses, 1): | |
| cleaned = strip_final_answer_prefix(response) | |
| payload_item = {"task_id": f"test_{i:03d}", "submitted_answer": cleaned} | |
| print(f"Response {i}:") | |
| print(f" Original: '{response[:100]}{'...' if len(response) > 100 else ''}'") | |
| print(f" Cleaned: '{cleaned}'") | |
| print(f" Payload: {payload_item}") | |
| print(f" Empty?: {len(cleaned.strip()) == 0}") | |
| print("-" * 40) | |
| if __name__ == "__main__": | |
| test_answer_cleaning() | |
| debug_regex_issue() | |
| test_submission_format() |