Spaces:

mindchain
/

rlm-arithmetic-training

Runtime error

App Files Files Community

mindchain commited on Feb 17

Commit

786e916

verified ·

1 Parent(s): 22c3bb1

Upload train_arithmetic_v3.py with huggingface_hub

Browse files

Files changed (1) hide show

train_arithmetic_v3.py +34 -8

train_arithmetic_v3.py CHANGED Viewed

@@ -53,12 +53,42 @@ def generate_arithmetic_samples(n_samples):
     return samples
 # ============================================================================
-# REWARD FUNCTION
 # ============================================================================
 def reward_func(completions, prompts=None, **kwargs):
     """
-    Reward function for arithmetic.
     """
     # Try multiple column names for ground truth
     answers = None
@@ -81,12 +111,8 @@ def reward_func(completions, prompts=None, **kwargs):
         else:
             text = str(completion)
-        # Extract the last number
-        numbers = re.findall(r'-?\d+\.?\d*', text)
-        if numbers:
-            predicted = numbers[-1].strip()
-        else:
-            predicted = ""
         # Exact match reward
         is_correct = predicted == str(truth).strip()

     return samples
 # ============================================================================
+# REWARD FUNCTION (Improved)
 # ============================================================================
+def extract_answer(text):
+    """
+    Extract the final answer from model output.
+    Priority:
+    1. Number in $$...$$ LaTeX blocks (last one)
+    2. Number after "Answer:" pattern
+    3. Last standalone number (fallback)
+    """
+    # Try to find numbers in $$...$$ blocks first
+    latex_blocks = re.findall(r'\$\$(.*?)\$\$', text, re.DOTALL)
+    if latex_blocks:
+        # Get the last LaTeX block and extract number
+        last_block = latex_blocks[-1]
+        numbers = re.findall(r'-?\d+\.?\d*', last_block)
+        if numbers:
+            return numbers[-1].strip()
+    # Try to find number after "Answer:" pattern
+    answer_match = re.search(r'Answer:\s*(-?\d+\.?\d*)', text, re.IGNORECASE)
+    if answer_match:
+        return answer_match.group(1).strip()
+    # Fallback: last number in text
+    numbers = re.findall(r'-?\d+\.?\d*', text)
+    if numbers:
+        return numbers[-1].strip()
+    return ""
 def reward_func(completions, prompts=None, **kwargs):
     """
+    Reward function for arithmetic with improved extraction.
     """
     # Try multiple column names for ground truth
     answers = None
         else:
             text = str(completion)
+        # Extract answer using improved method
+        predicted = extract_answer(text)
         # Exact match reward
         is_correct = predicted == str(truth).strip()