Spaces:

Varshithdharmajv
/

mvm2-math-verification

Sleeping

App Files Files Community

Varshithdharmajv commited on Mar 21

Commit

b2df04b

verified ·

1 Parent(s): 693764f

Upload math_verify/tasks.py with huggingface_hub

Browse files

Files changed (1) hide show

math_verify/tasks.py +324 -0

math_verify/tasks.py ADDED Viewed

	@@ -0,0 +1,324 @@

+import logging
+from textwrap import dedent
+from typing import Callable, Optional
+import numpy as np
+from lighteval.metrics.dynamic_metrics import SampleLevelMetric
+from lighteval.metrics.utils.metric_utils import MetricCategory, MetricUseCase
+from lighteval.tasks.lighteval_task import LightevalTaskConfig
+from lighteval.tasks.requests import Doc
+from math_verify.few_shots import GSM8K_FEW_SHOTS, MATH_HARD_FEW_SHOTS
+from math_verify.metric import math_metric
+from math_verify.parser import ExprExtractionConfig, LatexExtractionConfig
+logger = logging.getLogger(__name__)
+def as_lighteval_metric(
+    metric: Callable[
+        [list[str], list[str]], tuple[float, Optional[tuple[list[str], list[str]]]]
+    ],
+) -> SampleLevelMetric:
+    def sample_level_fn(
+        formatted_doc: Doc, golds: list[str], predictions: list[str]
+    ) -> float:
+        result, extracted_predictions = metric(golds, predictions)
+        if extracted_predictions is not None:
+            if not formatted_doc.specific:
+                formatted_doc.specific = {}
+            formatted_doc.specific["extracted_predictions"] = extracted_predictions
+        return result
+    return SampleLevelMetric(
+        metric_name="extractive_match",
+        sample_level_fn=sample_level_fn,
+        category=MetricCategory.GENERATIVE,
+        use_case=MetricUseCase.ACCURACY,
+        corpus_level_fn=np.mean,
+        higher_is_better=True,
+    )
+def math_hard_prompt_function(x: dict, task_name: str) -> Doc:
+    if x.get("__few_shots"):
+        index = x["__index"]
+        few_shot_doc = (
+            MATH_HARD_FEW_SHOTS[index]
+            if index < len(MATH_HARD_FEW_SHOTS)
+            else MATH_HARD_FEW_SHOTS[-1]
+        )
+        answer = few_shot_doc["answer"]
+        question = few_shot_doc["question"]
+    else:
+        answer = str(x["solution"])
+        question = x["problem"]
+    query = dedent(
+        f"""\
+Question: {question}
+Step-by-Step Answer:\
+"""
+    ).strip()
+    choices = [answer]
+    return Doc(query=query, choices=choices, gold_index=0)
+def math_prompt_function(x: dict, task_name: str) -> Doc:
+    if x.get("__few_shots"):
+        index = x["__index"]
+        few_shot_doc = (
+            MATH_HARD_FEW_SHOTS[index]
+            if index < len(MATH_HARD_FEW_SHOTS)
+            else MATH_HARD_FEW_SHOTS[-1]
+        )
+        answer = few_shot_doc["answer"]
+        question = few_shot_doc["question"]
+    else:
+        answer = str(x["answer"])
+        question = x["problem"]
+    query = dedent(
+        f"""\
+Question: {question}
+Step-by-Step Answer:\
+"""
+    ).strip()
+    choices = [answer]
+    return Doc(query=query, choices=choices, gold_index=0)
+def math_aime24_prompt_function(x: dict, task_name: str) -> Doc:
+    if x.get("__few_shots"):
+        index = x["__index"]
+        few_shot_doc = (
+            MATH_HARD_FEW_SHOTS[index]
+            if index < len(MATH_HARD_FEW_SHOTS)
+            else MATH_HARD_FEW_SHOTS[-1]
+        )
+        answer = few_shot_doc["answer"]
+        question = few_shot_doc["question"]
+    else:
+        answer = str(x["reference_solution"])
+        question = x["problem"]
+    query = dedent(
+        f"""\
+Question: {question}
+Step-by-Step Answer:\
+"""
+    ).strip()
+    choices = [f" {answer}"]
+    return Doc(query=query, choices=choices, gold_index=0)
+def math_amc23_prompt_function(x: dict, task_name: str) -> Doc:
+    if x.get("__few_shots"):
+        index = x["__index"]
+        few_shot_doc = (
+            MATH_HARD_FEW_SHOTS[index]
+            if index < len(MATH_HARD_FEW_SHOTS)
+            else MATH_HARD_FEW_SHOTS[-1]
+        )
+        answer = few_shot_doc["answer"]
+        question = few_shot_doc["question"]
+    else:
+        answer = str(x["answer"])
+        question = x["question"]
+    query = dedent(
+        f"""\
+Question: {question}
+Step-by-Step Answer:\
+"""
+    ).strip()
+    choices = [f" {answer}"]
+    return Doc(query=query, choices=choices, gold_index=0)
+def gsm8k_prompt_function(x: dict, task_name: str) -> Doc:
+    if x.get("__few_shots"):
+        index = x["__index"]
+        few_shot_doc = (
+            GSM8K_FEW_SHOTS[index]
+            if index < len(GSM8K_FEW_SHOTS)
+            else GSM8K_FEW_SHOTS[-1]
+        )
+        answer = few_shot_doc["answer"]
+        question = few_shot_doc["question"]
+    else:
+        answer = f"{x['answer'].split('####')[-1].strip()}"
+        question = x["question"]
+    query = dedent(
+        f"""\
+Question: {question}
+Step-by-Step Answer:\
+"""
+    ).strip()
+    choices = [f" {answer}"]
+    return Doc(query=query, choices=choices, gold_index=0)
+math_hard_lighteval = [
+    LightevalTaskConfig(
+        name=f"math_hard:{subset}",
+        suite=["lighteval", "math"],
+        prompt_function=math_hard_prompt_function,
+        hf_repo="lighteval/MATH-Hard",
+        hf_subset=subset,
+        evaluation_splits=["test"],
+        few_shots_split="train",
+        generation_size=1024,
+        metric=[
+            as_lighteval_metric(
+                math_metric(
+                    gold_extraction_target=(
+                        LatexExtractionConfig(boxed_match_priority=0),
+                    ),
+                    pred_extraction_target=(
+                        LatexExtractionConfig(),
+                        ExprExtractionConfig(),
+                    ),
+                )
+            ),
+        ],
+        stop_sequence=["\nQuestion:", "\nProblem:", "\nquestion:", "\nproblem:"],
+        trust_dataset=True,
+        version=0,
+    )
+    for subset in [
+        "algebra",
+        "counting_and_probability",
+        "geometry",
+        "intermediate_algebra",
+        "number_theory",
+        "prealgebra",
+        "precalculus",
+    ]
+]
+math_500_lighteval = [
+    LightevalTaskConfig(
+        name="math_500",
+        suite=["lighteval", "math"],
+        prompt_function=math_prompt_function,
+        hf_repo="HuggingFaceH4/MATH-500",
+        hf_subset="default",
+        evaluation_splits=["test"],
+        few_shots_split="test",
+        generation_size=1024,
+        metric=[
+            as_lighteval_metric(
+                math_metric(
+                    gold_extraction_target=(
+                        LatexExtractionConfig(boxed_match_priority=0),
+                    ),
+                    pred_extraction_target=(
+                        LatexExtractionConfig(),
+                        ExprExtractionConfig(),
+                    ),
+                )
+            ),
+        ],
+        stop_sequence=["\nQuestion:", "\nProblem:", "\nquestion:", "\nproblem:"],
+        trust_dataset=True,
+        version=0,
+    )
+]
+aime24_lighteval = [
+    LightevalTaskConfig(
+        name="aime24",
+        suite=["lighteval", "math"],
+        prompt_function=math_aime24_prompt_function,
+        hf_repo="zwhe99/aime24",
+        hf_subset="default",
+        evaluation_splits=["test"],
+        few_shots_split="test",
+        generation_size=1024,
+        metric=[
+            as_lighteval_metric(
+                math_metric(
+                    gold_extraction_target=(LatexExtractionConfig(),),
+                    pred_extraction_target=(
+                        LatexExtractionConfig(),
+                        ExprExtractionConfig(),
+                    ),
+                )
+            ),
+        ],
+        stop_sequence=["\nQuestion:", "\nProblem:", "\nquestion:", "\nproblem:"],
+        trust_dataset=True,
+        version=0,
+    )
+]
+amc23_lighteval = [
+    LightevalTaskConfig(
+        name="amc23",
+        suite=["lighteval", "math"],
+        prompt_function=math_amc23_prompt_function,
+        hf_repo="zwhe99/amc23",
+        hf_subset="default",
+        hf_filter=lambda x: len(x["question"].strip()) > 0,
+        evaluation_splits=["test"],
+        few_shots_split="test",
+        generation_size=1024,
+        metric=[
+            as_lighteval_metric(
+                math_metric(
+                    gold_extraction_target=(ExprExtractionConfig(),),
+                    pred_extraction_target=(
+                        LatexExtractionConfig(),
+                        ExprExtractionConfig(),
+                    ),
+                )
+            ),
+        ],
+        stop_sequence=["\nQuestion:", "\nProblem:", "\nquestion:", "\nproblem:"],
+        trust_dataset=True,
+        version=0,
+    )
+]
+gsm8k_lighteval = [
+    LightevalTaskConfig(
+        name="gsm8k",
+        suite=["lighteval", "math"],
+        prompt_function=gsm8k_prompt_function,
+        hf_repo="openai/gsm8k",
+        hf_subset="main",
+        hf_filter=lambda x: len(x["question"].strip()) > 0,
+        evaluation_splits=["test"],
+        few_shots_split="test",
+        generation_size=1024,
+        stop_sequence=["\nQuestion:", "\nProblem:", "\nquestion:", "\nproblem:"],
+        metric=[
+            as_lighteval_metric(
+                math_metric(
+                    gold_extraction_target=(ExprExtractionConfig(),),
+                    pred_extraction_target=(
+                        LatexExtractionConfig(),
+                        ExprExtractionConfig(),
+                    ),
+                    fallback_mode="first_match",
+                )
+            ),
+        ],
+    )
+]
+TASKS_TABLE = [
+    *gsm8k_lighteval,
+    *math_hard_lighteval,
+    *math_500_lighteval,
+    *aime24_lighteval,
+    *amc23_lighteval,
+]