|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import re |
|
|
from typing import Dict, List |
|
|
|
|
|
from mathruler.grader import extract_boxed_content, grade_answer |
|
|
|
|
|
|
|
|
def format_reward(predict: str) -> float: |
|
|
pattern = re.compile(r"<think>.*</think>.*\\boxed\{.*\}.*", re.DOTALL) |
|
|
format_match = re.fullmatch(pattern, predict) |
|
|
return 1.0 if format_match else 0.0 |
|
|
|
|
|
|
|
|
def accuracy_reward(predict: str, ground_truth: str) -> float: |
|
|
answer = extract_boxed_content(predict) |
|
|
return 1.0 if grade_answer(answer, ground_truth) else 0.0 |
|
|
|
|
|
|
|
|
def compute_score(predicts: List[str], ground_truths: List[str], format_weight: float = 0.1) -> List[Dict[str, float]]: |
|
|
scores = [] |
|
|
for predict, ground_truth in zip(predicts, ground_truths): |
|
|
predict = re.sub(r"\s*(<|>|/)\s*", r"\1", predict) |
|
|
format_score = format_reward(predict) |
|
|
accuracy_score = accuracy_reward(predict, ground_truth) |
|
|
scores.append( |
|
|
{ |
|
|
"overall": (1 - format_weight) * accuracy_score + format_weight * format_score, |
|
|
"format": format_score, |
|
|
"accuracy": accuracy_score, |
|
|
} |
|
|
) |
|
|
|
|
|
return scores |
|
|
|