Spaces:
Sleeping
Sleeping
File size: 3,213 Bytes
3874cd4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
#!/usr/bin/env python3
import re
def strip_final_answer_prefix(s: str) -> str:
"""Current function from app.py"""
return re.sub(r"FINAL ANSWER:\s*", "", s, flags=re.IGNORECASE).strip()
def test_answer_cleaning():
"""Test the answer cleaning function"""
print("=" * 60)
print("Testing answer cleaning function...")
print("=" * 60)
test_cases = [
"FINAL ANSWER: 42",
"final answer: hello world",
"This is my answer without prefix",
"FINAL ANSWER:42",
"The reasoning... FINAL ANSWER: The correct answer is Paris",
"FINAL ANSWER: The answer is 123",
"My reasoning leads me to conclude that FINAL ANSWER: 456",
"Let me think step by step.\n\nFirst, I need to calculate 2+2.\n\nFINAL ANSWER: 4"
]
for i, test in enumerate(test_cases, 1):
cleaned = strip_final_answer_prefix(test)
print(f"{i}. Input: '{test}'")
print(f" Output: '{cleaned}'")
print(f" Length: {len(cleaned)} chars")
if cleaned == test:
print(" WARNING: No cleaning occurred! Prefix not found or regex failed.")
print("-" * 50)
def debug_regex_issue():
"""Debug potential regex issues"""
print("\n" + "=" * 60)
print("Debugging regex patterns...")
print("=" * 60)
test_response = "Based on my calculation, FINAL ANSWER: 4"
# Test different regex patterns
patterns = [
(r"FINAL ANSWER:\s*", "Current pattern"),
(r"FINAL ANSWER:\s*(.*)$", "Capture group pattern"),
(r".*FINAL ANSWER:\s*", "Remove everything up to prefix"),
(r"^.*FINAL ANSWER:\s*", "From start to prefix"),
]
for i, (pattern, description) in enumerate(patterns, 1):
try:
result = re.sub(pattern, "", test_response, flags=re.IGNORECASE).strip()
print(f"{i}. {description}")
print(f" Pattern: {pattern}")
print(f" Result: '{result}'")
print(f" Length: {len(result)}")
except Exception as e:
print(f"{i}. {description} - ERROR: {e}")
print("-" * 40)
def test_submission_format():
"""Test what would be sent to API"""
print("\n" + "=" * 60)
print("Testing submission format...")
print("=" * 60)
# Simulate agent responses
agent_responses = [
"Let me calculate this step by step.\n\n2 + 2 = 4\n\nFINAL ANSWER: 4",
"FINAL ANSWER: Paris",
"After searching, I found that FINAL ANSWER: The answer is 42",
"No final answer format here, just a direct response"
]
for i, response in enumerate(agent_responses, 1):
cleaned = strip_final_answer_prefix(response)
payload_item = {"task_id": f"test_{i:03d}", "submitted_answer": cleaned}
print(f"Response {i}:")
print(f" Original: '{response[:100]}{'...' if len(response) > 100 else ''}'")
print(f" Cleaned: '{cleaned}'")
print(f" Payload: {payload_item}")
print(f" Empty?: {len(cleaned.strip()) == 0}")
print("-" * 40)
if __name__ == "__main__":
test_answer_cleaning()
debug_regex_issue()
test_submission_format() |