File size: 3,213 Bytes
3874cd4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/env python3

import re

def strip_final_answer_prefix(s: str) -> str:
    """Current function from app.py"""
    return re.sub(r"FINAL ANSWER:\s*", "", s, flags=re.IGNORECASE).strip()

def test_answer_cleaning():
    """Test the answer cleaning function"""
    print("=" * 60)
    print("Testing answer cleaning function...")
    print("=" * 60)
    
    test_cases = [
        "FINAL ANSWER: 42",
        "final answer: hello world", 
        "This is my answer without prefix",
        "FINAL ANSWER:42",
        "The reasoning... FINAL ANSWER: The correct answer is Paris",
        "FINAL ANSWER: The answer is 123",
        "My reasoning leads me to conclude that FINAL ANSWER: 456",
        "Let me think step by step.\n\nFirst, I need to calculate 2+2.\n\nFINAL ANSWER: 4"
    ]
    
    for i, test in enumerate(test_cases, 1):
        cleaned = strip_final_answer_prefix(test)
        print(f"{i}. Input:  '{test}'")
        print(f"   Output: '{cleaned}'")
        print(f"   Length: {len(cleaned)} chars")
        if cleaned == test:
            print("   WARNING: No cleaning occurred! Prefix not found or regex failed.")
        print("-" * 50)

def debug_regex_issue():
    """Debug potential regex issues"""
    print("\n" + "=" * 60)
    print("Debugging regex patterns...")  
    print("=" * 60)
    
    test_response = "Based on my calculation, FINAL ANSWER: 4"
    
    # Test different regex patterns
    patterns = [
        (r"FINAL ANSWER:\s*", "Current pattern"),
        (r"FINAL ANSWER:\s*(.*)$", "Capture group pattern"),  
        (r".*FINAL ANSWER:\s*", "Remove everything up to prefix"),
        (r"^.*FINAL ANSWER:\s*", "From start to prefix"),
    ]
    
    for i, (pattern, description) in enumerate(patterns, 1):
        try:
            result = re.sub(pattern, "", test_response, flags=re.IGNORECASE).strip()
            print(f"{i}. {description}")
            print(f"   Pattern: {pattern}")
            print(f"   Result: '{result}'")
            print(f"   Length: {len(result)}")
        except Exception as e:
            print(f"{i}. {description} - ERROR: {e}")
        print("-" * 40)

def test_submission_format():
    """Test what would be sent to API"""
    print("\n" + "=" * 60)
    print("Testing submission format...")
    print("=" * 60)
    
    # Simulate agent responses
    agent_responses = [
        "Let me calculate this step by step.\n\n2 + 2 = 4\n\nFINAL ANSWER: 4",
        "FINAL ANSWER: Paris",
        "After searching, I found that FINAL ANSWER: The answer is 42",
        "No final answer format here, just a direct response"
    ]
    
    for i, response in enumerate(agent_responses, 1):
        cleaned = strip_final_answer_prefix(response)
        payload_item = {"task_id": f"test_{i:03d}", "submitted_answer": cleaned}
        
        print(f"Response {i}:")
        print(f"  Original: '{response[:100]}{'...' if len(response) > 100 else ''}'")  
        print(f"  Cleaned:  '{cleaned}'")
        print(f"  Payload:  {payload_item}")
        print(f"  Empty?:   {len(cleaned.strip()) == 0}")
        print("-" * 40)

if __name__ == "__main__":
    test_answer_cleaning()
    debug_regex_issue()
    test_submission_format()