Spaces:
Running on Zero
Running on Zero
| import re | |
| import traceback | |
| from megatron.rl.agent.reward_only_agent import PassAtEvaluationAgent | |
| try: | |
| from math_verify import parse, verify | |
| except ImportError: | |
| print( | |
| "math_verify is not installed. Install it using `pip install math-verify`. Continuing using exact match verification." | |
| ) | |
| MATHVERIFY_AVAILABLE = False | |
| else: | |
| print("math_verify is installed. Using math_verify to verify answers.") | |
| MATHVERIFY_AVAILABLE = True | |
| assert ( | |
| MATHVERIFY_AVAILABLE | |
| ), "math_verify is not installed but now required. Install it using `pip install math-verify` to continue." | |
| NEGATIVE_REWARD = 0.0 | |
| class MathAgent(PassAtEvaluationAgent): | |
| def __init__(self, format_reward: float = 0.0, answer_format: str = "tagged", **kwargs): | |
| super().__init__(**kwargs) | |
| assert answer_format in ["tagged", "boxed"], "Invalid answer format" | |
| self.format_reward = format_reward | |
| self.answer_format = answer_format | |
| def compute_score(self, response: str, golden: dict, golden_key: str = "answer") -> float: | |
| """Take a response and a golden answer and return a score. Supports tagged or boxed answers. | |
| Uses the final answer in the response string to compute the score. | |
| """ | |
| # Allow <answer> tags or \boxed{} tags (this is a bit of cheating in favor of deepseek distilled models I think) | |
| for pattern in [ | |
| r'<answer>(.*?)</answer>', | |
| r"\\boxed\{((?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*)\}", | |
| ]: | |
| match = re.finditer(pattern, response, re.DOTALL) | |
| matches = list(match) | |
| if matches: | |
| final_answer = matches[-1].group(1).strip() | |
| break | |
| else: | |
| # Did not format the answer correctly | |
| return NEGATIVE_REWARD | |
| try: | |
| parsed_answer = parse(final_answer) | |
| except ValueError as e: | |
| print("Failed to parse the answer.") | |
| traceback.print_stack() | |
| return NEGATIVE_REWARD | |
| correct_answer = verify(str(golden[golden_key]), parsed_answer) | |
| if correct_answer: | |
| return 1.0 | |
| else: | |
| # Formatting is correct but the answer is incorrect | |
| return self.format_reward | |
| def make_prefix(self, problem_key: str = "problem", **kwargs) -> str: | |
| """Take a string math problem and return the prompt. Supports requesting tagged or boxed answers. Supports chat mode prompts.""" | |
| if self.answer_format == "boxed": | |
| answer_format = "Please reason step by step and provide your answer between \\boxed{} tags, for example \\boxed{20\\sqrt{3}}." | |
| elif self.answer_format == "tagged": | |
| answer_format = "Please reason step by step and provide your answer between <answer> </answer> tags, for example <answer> 20\\sqrt{3} </answer>. Do not include an = sign." | |
| else: | |
| raise ValueError(f"Invalid answer format: {self.answer_format}") | |
| if self.chat_mode: | |
| prefix = f"""{kwargs[problem_key]}\n{answer_format}""" | |
| else: | |
| prefix = f"""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. | |
| The question will be a word math problem. Show your work in <think> </think> tags. | |
| {answer_format} | |
| User: {kwargs[problem_key]} | |
| Assistant: Let me solve this step by step. | |
| <think>""" | |
| return prefix | |