import re import traceback from megatron.rl.agent.reward_only_agent import PassAtEvaluationAgent try: from math_verify import parse, verify except ImportError: print( "math_verify is not installed. Install it using `pip install math-verify`. Continuing using exact match verification." ) MATHVERIFY_AVAILABLE = False else: print("math_verify is installed. Using math_verify to verify answers.") MATHVERIFY_AVAILABLE = True assert ( MATHVERIFY_AVAILABLE ), "math_verify is not installed but now required. Install it using `pip install math-verify` to continue." NEGATIVE_REWARD = 0.0 class MathAgent(PassAtEvaluationAgent): def __init__(self, format_reward: float = 0.0, answer_format: str = "tagged", **kwargs): super().__init__(**kwargs) assert answer_format in ["tagged", "boxed"], "Invalid answer format" self.format_reward = format_reward self.answer_format = answer_format def compute_score(self, response: str, golden: dict, golden_key: str = "answer") -> float: """Take a response and a golden answer and return a score. Supports tagged or boxed answers. Uses the final answer in the response string to compute the score. """ # Allow tags or \boxed{} tags (this is a bit of cheating in favor of deepseek distilled models I think) for pattern in [ r'(.*?)', r"\\boxed\{((?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*)\}", ]: match = re.finditer(pattern, response, re.DOTALL) matches = list(match) if matches: final_answer = matches[-1].group(1).strip() break else: # Did not format the answer correctly return NEGATIVE_REWARD try: parsed_answer = parse(final_answer) except ValueError as e: print("Failed to parse the answer.") traceback.print_stack() return NEGATIVE_REWARD correct_answer = verify(str(golden[golden_key]), parsed_answer) if correct_answer: return 1.0 else: # Formatting is correct but the answer is incorrect return self.format_reward def make_prefix(self, problem_key: str = "problem", **kwargs) -> str: """Take a string math problem and return the prompt. Supports requesting tagged or boxed answers. Supports chat mode prompts.""" if self.answer_format == "boxed": answer_format = "Please reason step by step and provide your answer between \\boxed{} tags, for example \\boxed{20\\sqrt{3}}." elif self.answer_format == "tagged": answer_format = "Please reason step by step and provide your answer between tags, for example 20\\sqrt{3} . Do not include an = sign." else: raise ValueError(f"Invalid answer format: {self.answer_format}") if self.chat_mode: prefix = f"""{kwargs[problem_key]}\n{answer_format}""" else: prefix = f"""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The question will be a word math problem. Show your work in tags. {answer_format} User: {kwargs[problem_key]} Assistant: Let me solve this step by step. """ return prefix