sdzt
/

forensics-grpo

Video-Text-to-Text

temporal-grounding

Model card Files Files and versions

forensics-grpo / code /src /open_r1 /evaluate.py

sdzt's picture

Add source code

33569f9 verified 29 days ago

History Blame Contribute Delete

2.45 kB

	# Copyright 2025 The HuggingFace Team. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""Custom evaluation tasks for LightEval."""

	from lighteval.metrics.dynamic_metrics import (
	ExprExtractionConfig,
	LatexExtractionConfig,
	multilingual_extractive_match_metric,
	)
	from lighteval.tasks.lighteval_task import LightevalTaskConfig
	from lighteval.tasks.requests import Doc
	from lighteval.utils.language import Language


	metric = multilingual_extractive_match_metric(
	language=Language.ENGLISH,
	fallback_mode="first_match",
	precision=5,
	gold_extraction_target=(LatexExtractionConfig(),),
	pred_extraction_target=(ExprExtractionConfig(), LatexExtractionConfig()),
	aggregation_function=max,
	)


	def prompt_fn(line, task_name: str = None):
	"""Assumes the model is either prompted to emit \\boxed{answer} or does so automatically"""
	return Doc(
	task_name=task_name,
	query=line["problem"],
	choices=[line["solution"]],
	gold_index=0,
	)


	# Define tasks
	aime24 = LightevalTaskConfig(
	name="aime24",
	suite=["custom"],
	prompt_function=prompt_fn,
	hf_repo="HuggingFaceH4/aime_2024",
	hf_subset="default",
	hf_avail_splits=["train"],
	evaluation_splits=["train"],
	few_shots_split=None,
	few_shots_select=None,
	generation_size=32768,
	metric=[metric],
	version=1,
	)
	math_500 = LightevalTaskConfig(
	name="math_500",
	suite=["custom"],
	prompt_function=prompt_fn,
	hf_repo="HuggingFaceH4/MATH-500",
	hf_subset="default",
	hf_avail_splits=["test"],
	evaluation_splits=["test"],
	few_shots_split=None,
	few_shots_select=None,
	generation_size=32768,
	metric=[metric],
	version=1,
	)

	# Add tasks to the table
	TASKS_TABLE = []
	TASKS_TABLE.append(aime24)
	TASKS_TABLE.append(math_500)

	# MODULE LOGIC
	if __name__ == "__main__":
	print([t["name"] for t in TASKS_TABLE])
	print(len(TASKS_TABLE))