updated models (2026-01-06)

ebe2ddc verified 2 months ago

44.2 kB

	"""Preprocessing functions for various benchmark datasets.

	This module provides data loading and prompt formatting functions for:
	- Math benchmarks: MATH, GSM8K, AIME, Minerva Math, OmniMath, etc.
	- Coding benchmarks: HumanEval, LiveCodeBench, MBPP
	- Multiple-choice: MMLU, MMLU Pro, GPQA
	- Instruction following: IFEval, IFBench, MT-Bench
	- General: AlpacaEval, Arena-Hard
	"""

	import json
	import pandas
	import os


	def preprocess_gpqa_chatml_template(data_file, use_r1=False, think=True):
	"""Preprocess GPQA dataset with ChatML template formatting.

	Args:
	data_file: Path to GPQA JSON file
	use_r1: Whether to use DeepSeek R1-style prompting (default: False)
	think: Whether to enable thinking mode (default: True)

	Returns:
	list: Formatted prompts with ChatML template
	"""
	if use_r1:
	QUERY_TEMPLATE_MULTICHOICE = "{Question}\n\n\nA. {choice1}\nB. {choice2}\nC. {choice3}\nD. {choice4}\n\nPlease reason step-by-step and put your choice letter without any other text with \\boxed{{}} in the end. Let's think step by step and output the final answer within \\boxed{{}}."
	else:
	QUERY_TEMPLATE_MULTICHOICE = "Return your final response within \\boxed{{}} and only include the letter choice (e.g., A, B, C, or D) as your final response.\n\n{Question}\n\nAnswer Choices:\n(A) {choice1}\n(B) {choice2}\n(C) {choice3}\n(D) {choice4}"
	instruction = "<\|im_start\|>system\nYou are a helpful and harmless assistant.<\|im_end\|>\n"

	with open(data_file, "r") as f:
	data_list = json.load(f)

	prompt_list = []
	for item in data_list:
	choices_dict = dict(
	Question=item['question'].strip(),
	choice1=item['choice_A'].strip(),
	choice2=item['choice_B'].strip(),
	choice3=item['choice_C'].strip(),
	choice4=item['choice_D'].strip()
	)
	final_question = QUERY_TEMPLATE_MULTICHOICE.format(**choices_dict)
	if use_r1:
	final_prompt = """<｜begin▁of▁sentence｜><｜User｜>{question}.<｜Assistant｜><think>\n""".format(question=final_question)
	else:
	if think:
	final_prompt = instruction + "<\|im_start\|>user\n" + final_question + " /think<\|im_end\|>\n<\|im_start\|>assistant\n<think>\n"
	else:
	final_prompt = instruction + "<\|im_start\|>user\n" + final_question + " /no_think<\|im_end\|>\n<\|im_start\|>assistant\n"

	prompt_list.append(final_prompt)

	return prompt_list


	def preprocess_gpqa_raw_template(data_file, use_r1=False, think=True):
	"""Preprocess GPQA dataset with raw (no template) formatting.

	Args:
	data_file: Path to GPQA JSON file
	use_r1: Whether to use DeepSeek R1-style prompting (default: False)
	think: Whether to enable thinking mode (default: True)

	Returns:
	list: Raw formatted prompts without chat template
	"""
	if use_r1:
	QUERY_TEMPLATE_MULTICHOICE = "{Question}\n\n\nA. {choice1}\nB. {choice2}\nC. {choice3}\nD. {choice4}\n\nPlease reason step-by-step and put your choice letter without any other text with \\boxed{{}} in the end. Let's think step by step and output the final answer within \\boxed{{}}."
	else:
	QUERY_TEMPLATE_MULTICHOICE = "Return your final response within \\boxed{{}} and only include the letter choice (e.g., A, B, C, or D) as your final response.\n\n{Question}\n\nAnswer Choices:\n(A) {choice1}\n(B) {choice2}\n(C) {choice3}\n(D) {choice4}"

	with open(data_file, "r") as f:
	data_list = json.load(f)

	prompt_list = []
	for item in data_list:
	choices_dict = dict(
	Question=item['question'].strip(),
	choice1=item['choice_A'].strip(),
	choice2=item['choice_B'].strip(),
	choice3=item['choice_C'].strip(),
	choice4=item['choice_D'].strip()
	)
	final_question = QUERY_TEMPLATE_MULTICHOICE.format(**choices_dict)
	prompt_list.append(final_question)

	return prompt_list




	def preprocess_gsm8k_zeroshot_chatml_template(data_file):
	"""Preprocess GSM8K dataset with zero-shot ChatML template.

	Args:
	data_file: Path to GSM8K JSON file

	Returns:
	list: Formatted prompts with ChatML template and thinking enabled
	"""
	with open(data_file, "r") as f:
	data_list = json.load(f)

	instruction = "<\|im_start\|>system\nYou are a helpful and harmless assistant. You should think step-by-step.<\|im_end\|>\n"

	prompt_list = []
	for item in data_list:
	final_question = item['question'].strip()
	final_prompt = instruction + "<\|im_start\|>user\n" + final_question + "<\|im_end\|>\n<\|im_start\|>assistant\n<think>\n"
	prompt_list.append(final_prompt)

	return prompt_list

	def preprocess_gsm8k_zeroshot_raw(data_file):
	"""Preprocess GSM8K dataset with zero-shot raw formatting.

	Args:
	data_file: Path to GSM8K JSON file

	Returns:
	list: Raw question prompts without chat template
	"""
	with open(data_file, "r") as f:
	data_list = json.load(f)

	prompt_list = []
	for item in data_list:
	final_question = item['question'].strip()
	final_prompt = final_question
	prompt_list.append(final_prompt)

	return prompt_list


	def preprocess_humaneval_raw(data_file):
	"""Preprocess HumanEval code generation dataset.

	Args:
	data_file: Path to HumanEval JSON file

	Returns:
	tuple: (prompt_list, qid_list)
	- prompt_list: Code completion prompts with instructions
	- qid_list: Task IDs
	"""
	qid_list = []
	prompt_list = []
	instruction = "Read the following function signature and docstring, and fully implement the function described. Your response should only contain the code for this function.\n"
	with open(data_file, "r") as f:
	data_dict = json.load(f)
	for key, values in data_dict.items():
	qid_list.append(key)
	prompt = instruction + values['prompt']
	prompt_list.append(prompt)

	return prompt_list, qid_list


	def preprocess_math_zeroshot_chatml_template(data_file):
	"""Preprocess MATH dataset with zero-shot ChatML template.

	Args:
	data_file: Path to MATH CSV file

	Returns:
	list: Formatted prompts with ChatML template and thinking enabled
	"""
	instruction = "<\|im_start\|>system\nYou are a helpful and harmless assistant. You should think step-by-step.<\|im_end\|>\n"
	df = pandas.read_csv(data_file)
	test_list = [row.to_dict() for _, row in df.iterrows()]
	prompt_list = []
	for item in test_list:
	final_question = item['Question'].strip()
	final_prompt = instruction + "<\|im_start\|>user\n" + final_question + "<\|im_end\|>\n<\|im_start\|>assistant\n<think>\n"

	prompt_list.append(final_prompt)

	return prompt_list


	def preprocess_math500_zeroshot_chatml_template(data_file, use_r1=False):
	"""Preprocess MATH-500 dataset with zero-shot prompting.

	Args:
	data_file: Path to MATH-500 JSONL file
	use_r1: Whether to use DeepSeek R1-style prompting (default: False)

	Returns:
	list: Formatted prompts with boxed answer instruction
	"""
	instruction = "<\|im_start\|>system\nYou are a helpful and harmless assistant. You should think step-by-step.<\|im_end\|>\n"

	prompt_list = []
	with open(data_file, "r") as f:
	for line in f:
	data_dict = json.loads(line)
	final_question = data_dict['problem'].strip()
	if use_r1:
	final_prompt = """<｜begin▁of▁sentence｜><｜User｜>{question}\nPlease reason step by step, and put your final answer within \boxed{{}}.<｜Assistant｜><think>\n""".format(question=final_question)
	else:
	final_prompt = instruction + "<\|im_start\|>user\n" + final_question + "\n\nPlease place your final answer inside \\boxed{}." + "<\|im_end\|>\n<\|im_start\|>assistant\n<think>\n"
	prompt_list.append(final_prompt)

	return prompt_list


	def preprocess_minerva_math_chatml_template(data_file):
	"""Preprocess Minerva Math dataset with ChatML template.

	Args:
	data_file: Path to Minerva Math JSONL file

	Returns:
	list: Formatted prompts with boxed answer instruction
	"""
	instruction = "<\|im_start\|>system\nYou are a helpful and harmless assistant. You should think step-by-step.<\|im_end\|>\n"
	prompt_list = []
	with open(data_file, "r") as f:
	for line in f:
	item = json.loads(line)
	final_question = item['problem'].strip()
	final_prompt = instruction + "<\|im_start\|>user\n" + final_question + "\n\nPlease place your final answer inside \\boxed{}." + "<\|im_end\|>\n<\|im_start\|>assistant\n<think>\n"

	prompt_list.append(final_prompt)

	return prompt_list


	def preprocess_gaokao2023en_chatml_template(data_file):
	"""Preprocess Gaokao 2023 English dataset with ChatML template.

	Args:
	data_file: Path to Gaokao 2023 English JSONL file

	Returns:
	list: Formatted prompts with boxed answer instruction
	"""
	instruction = "<\|im_start\|>system\nYou are a helpful and harmless assistant. You should think step-by-step.<\|im_end\|>\n"
	prompt_list = []
	with open(data_file, "r", encoding="utf-8") as f:
	for line in f:
	item = json.loads(line)
	final_question = item['question'].strip()
	final_prompt = instruction + "<\|im_start\|>user\n" + final_question + "\n\nPlease place your final answer inside \\boxed{}." + "<\|im_end\|>\n<\|im_start\|>assistant\n<think>\n"

	prompt_list.append(final_prompt)

	return prompt_list


	def preprocess_olympiadbench_chatml_template(data_file):
	"""Preprocess OlympiadBench dataset with ChatML template.

	Args:
	data_file: Path to OlympiadBench JSONL file

	Returns:
	list: Formatted prompts with boxed answer instruction
	"""
	instruction = "<\|im_start\|>system\nYou are a helpful and harmless assistant. You should think step-by-step.<\|im_end\|>\n"
	prompt_list = []
	with open(data_file, "r", encoding="utf-8") as f:
	for line in f:
	item = json.loads(line)
	final_question = item['question'].strip()
	final_prompt = instruction + "<\|im_start\|>user\n" + final_question + "\n\nPlease place your final answer inside \\boxed{}." + "<\|im_end\|>\n<\|im_start\|>assistant\n<think>\n"

	prompt_list.append(final_prompt)

	return prompt_list


	def preprocess_collegemath_chatml_template(data_file):
	"""Preprocess College Math dataset with ChatML template.

	Args:
	data_file: Path to College Math JSONL file

	Returns:
	list: Formatted prompts with boxed answer instruction
	"""
	instruction = "<\|im_start\|>system\nYou are a helpful and harmless assistant. You should think step-by-step.<\|im_end\|>\n"
	prompt_list = []
	with open(data_file, "r") as f:
	for line in f:
	item = json.loads(line)
	final_question = item['question'].strip()
	final_prompt = instruction + "<\|im_start\|>user\n" + final_question + "\n\nPlease place your final answer inside \\boxed{}." + "<\|im_end\|>\n<\|im_start\|>assistant\n<think>\n"

	prompt_list.append(final_prompt)

	return prompt_list


	def preprocess_aime24_chatml_template(data_file):
	"""Preprocess AIME 2024 dataset with ChatML template.

	Args:
	data_file: Path to AIME 2024 JSONL file

	Returns:
	list: Formatted prompts with thinking enabled
	"""
	instruction = "<\|im_start\|>system\nYou are a helpful and harmless assistant. You should think step-by-step.<\|im_end\|>\n"
	prompt_list = []
	with open(data_file, "r") as f:
	for line in f:
	item = json.loads(line)
	final_question = item['question'].strip()
	final_prompt = instruction + "<\|im_start\|>user\n" + final_question + "<\|im_end\|>\n<\|im_start\|>assistant\n<think>\n"

	prompt_list.append(final_prompt)

	return prompt_list


	def preprocess_aime25_chatml_template(data_file):
	"""Preprocess AIME 2025 dataset with ChatML template.

	Args:
	data_file: Path to AIME 2025 JSONL file

	Returns:
	list: Formatted prompts with thinking enabled
	"""
	instruction = "<\|im_start\|>system\nYou are a helpful and harmless assistant. You should think step-by-step.<\|im_end\|>\n"
	prompt_list = []
	with open(data_file, "r") as f:
	for line in f:
	item = json.loads(line)
	final_question = item['problem'].strip()
	final_prompt = instruction + "<\|im_start\|>user\n" + final_question + "<\|im_end\|>\n<\|im_start\|>assistant\n<think>\n"

	prompt_list.append(final_prompt)

	return prompt_list



	def preprocess_aime24_raw(data_file):
	"""Preprocess AIME 2024 dataset with raw formatting.

	Args:
	data_file: Path to AIME 2024 JSONL file

	Returns:
	list: Raw prompts with boxed answer instruction
	"""
	prompt_list = []
	with open(data_file, "r") as f:
	for line in f:
	item = json.loads(line)
	final_question = item['question'].strip()
	final_prompt = "{question}\nPlease reason step by step, and put your final answer within \\boxed{{}}.".format(question=final_question)
	prompt_list.append(final_prompt)

	return prompt_list


	def preprocess_aime25_raw(data_file):
	"""Preprocess AIME 2025 dataset with raw formatting.

	Args:
	data_file: Path to AIME 2025 JSONL file

	Returns:
	list: Raw prompts with boxed answer instruction
	"""
	prompt_list = []
	with open(data_file, "r") as f:
	for line in f:
	item = json.loads(line)
	final_question = item['problem'].strip()
	final_prompt = "{question}\nPlease reason step by step, and put your final answer within \\boxed{{}}.".format(question=final_question)
	prompt_list.append(final_prompt)

	return prompt_list


	def preprocess_amc23_chatml_template(data_file):
	"""Preprocess AMC 2023 dataset with ChatML template.

	Args:
	data_file: Path to AMC 2023 JSONL file

	Returns:
	list: Formatted prompts with thinking enabled
	"""
	instruction = "<\|im_start\|>system\nYou are a helpful and harmless assistant. You should think step-by-step.<\|im_end\|>\n"
	prompt_list = []
	with open(data_file, "r") as f:
	for line in f:
	item = json.loads(line)
	final_question = item['question'].strip()
	final_prompt = instruction + "<\|im_start\|>user\n" + final_question + "<\|im_end\|>\n<\|im_start\|>assistant\n<think>\n"

	prompt_list.append(final_prompt)

	return prompt_list


	def preprocess_omnimath_chatml_template(data_file):
	"""Preprocess OmniMath dataset with ChatML template.

	Args:
	data_file: Path to OmniMath JSONL file

	Returns:
	list: Formatted prompts with thinking enabled
	"""
	instruction = "<\|im_start\|>system\nYou are a helpful and harmless assistant. You should think step-by-step.<\|im_end\|>\n"
	prompt_list = []

	with open(data_file, "r") as f:
	for line in f:
	item = json.loads(line)
	final_question = item['problem'].strip()
	final_prompt = instruction + "<\|im_start\|>user\n" + final_question + "<\|im_end\|>\n<\|im_start\|>assistant\n<think>\n"

	prompt_list.append(final_prompt)

	return prompt_list


	def preprocess_ifeval_chatml_template(data_file):
	"""Preprocess IFEval instruction-following dataset with ChatML template.

	Args:
	data_file: Path to IFEval JSONL file

	Returns:
	tuple: (prompt_list, qid_list)
	- prompt_list: Formatted prompts with ChatML template
	- qid_list: Task IDs
	"""
	with open(data_file, "r") as f:
	data = f.readlines()
	data_list = [json.loads(x) for x in data]

	qid_list = []
	prompt_list = []
	for item in data_list:
	qid_list.append(item['key'])
	first_question = item['prompt']
	final_prompt = "<\|im_start\|>user\n" + first_question + "<\|im_end\|>\n<\|im_start\|>assistant\n"

	prompt_list.append(final_prompt)

	return prompt_list, qid_list


	def preprocess_ifeval_raw(data_file):
	"""Preprocess IFEval instruction-following dataset with raw formatting.

	Args:
	data_file: Path to IFEval JSONL file

	Returns:
	tuple: (prompt_list, qid_list)
	- prompt_list: Raw instruction prompts
	- qid_list: Task IDs
	"""
	with open(data_file, "r") as f:
	data = f.readlines()
	data_list = [json.loads(x) for x in data]

	qid_list = []
	prompt_list = []
	for item in data_list:
	qid_list.append(item['key'])
	final_prompt = item['prompt']

	prompt_list.append(final_prompt)

	return prompt_list, qid_list


	def preprocess_ifbench_raw(data_file):
	"""Preprocess IFBench instruction-following dataset with raw formatting.

	Args:
	data_file: Path to IFBench JSONL file

	Returns:
	tuple: (prompt_list, qid_list)
	- prompt_list: Raw instruction prompts
	- qid_list: Task IDs
	"""
	with open(data_file, "r") as f:
	data = f.readlines()
	data_list = [json.loads(x) for x in data]

	qid_list = []
	prompt_list = []
	for item in data_list:
	qid_list.append(item['key'])
	final_prompt = item['prompt']

	prompt_list.append(final_prompt)

	return prompt_list, qid_list


	def preprocess_arena_hard_chatml_template(data_file):
	"""Preprocess Arena-Hard dataset with ChatML template.

	Args:
	data_file: Path to Arena-Hard JSONL file

	Returns:
	tuple: (prompt_list, qid_list)
	- prompt_list: Formatted prompts with ChatML template
	- qid_list: Question IDs
	"""
	with open(data_file, "r") as f:
	data = f.readlines()
	data_list = [json.loads(x) for x in data]

	qid_list = []
	prompt_list = []
	for item in data_list:
	qid_list.append(item['question_id'])
	first_question = item['turns'][0]['content']
	final_prompt = "<\|im_start\|>user\n" + first_question + "<\|im_end\|>\n<\|im_start\|>assistant\n"

	prompt_list.append(final_prompt)

	return prompt_list, qid_list

	def preprocess_arena_hard_raw(data_file):
	"""Preprocess Arena-Hard dataset with raw formatting.

	Args:
	data_file: Path to Arena-Hard JSONL file

	Returns:
	tuple: (prompt_list, qid_list)
	- prompt_list: Raw question prompts
	- qid_list: Question IDs
	"""
	with open(data_file, "r") as f:
	data = f.readlines()
	data_list = [json.loads(x) for x in data]

	qid_list = []
	prompt_list = []
	for item in data_list:
	qid_list.append(item['question_id'])
	first_question = item['turns'][0]['content']

	prompt_list.append(first_question)

	return prompt_list, qid_list

	def preprocess_arena_hard_v2_raw(data_file):
	"""Preprocess Arena-Hard v2.0 dataset with raw formatting.

	Args:
	data_file: Path to Arena-Hard v2.0 JSONL file

	Returns:
	tuple: (prompt_list, qid_list)
	- prompt_list: Raw question prompts
	- qid_list: Unique IDs
	"""
	with open(data_file, "r") as f:
	data = f.readlines()
	data_list = [json.loads(x) for x in data]

	qid_list = []
	prompt_list = []
	for item in data_list:
	qid_list.append(item['uid'])
	first_question = item['prompt']

	prompt_list.append(first_question)

	return prompt_list, qid_list


	def preprocess_alpaca_eval_raw(data_file):
	"""Preprocess AlpacaEval dataset with raw formatting.

	Args:
	data_file: Path to AlpacaEval JSON file

	Returns:
	tuple: (prompt_list, qid_list)
	- prompt_list: Raw instruction prompts
	- qid_list: Sequential question IDs
	"""
	with open(data_file, "r") as f:
	data_list = json.load(f)

	qid_list = []
	prompt_list = []
	for i, item in enumerate(data_list):
	qid_list.append(i)
	final_prompt = item['instruction']

	prompt_list.append(final_prompt)

	return prompt_list, qid_list


	def preprocess_alpaca_eval_chatml_template(data_file):
	"""Preprocess AlpacaEval dataset with ChatML template.

	Args:
	data_file: Path to AlpacaEval JSON file

	Returns:
	tuple: (prompt_list, qid_list)
	- prompt_list: Formatted prompts with ChatML template
	- qid_list: Sequential question IDs
	"""
	with open(data_file, "r") as f:
	data_list = json.load(f)

	qid_list = []
	prompt_list = []
	for i, item in enumerate(data_list):
	qid_list.append(i)
	first_question = item['instruction']
	final_prompt = "<\|im_start\|>user\n" + first_question + "<\|im_end\|>\n<\|im_start\|>assistant\n"

	prompt_list.append(final_prompt)

	return prompt_list, qid_list



	def preprocess_mtbench_firstturn(data_file):
	"""Preprocess MT-Bench first turn with ChatML template.

	Args:
	data_file: Path to MT-Bench JSONL file

	Returns:
	tuple: (prompt_list, qid_list)
	- prompt_list: First turn prompts with ChatML template
	- qid_list: Question IDs
	"""
	with open(data_file, "r") as f:
	data = f.readlines()
	data_list = [json.loads(x) for x in data]

	qid_list = []
	prompt_list = []
	for item in data_list:
	qid_list.append(item['question_id'])
	first_question = item['turns'][0]
	final_prompt = "<\|im_start\|>user\n" + first_question + "<\|im_end\|>\n<\|im_start\|>assistant\n"

	prompt_list.append(final_prompt)

	return prompt_list, qid_list


	def preprocess_mtbench_firstturn_raw(data_file):
	"""Preprocess MT-Bench first turn with raw formatting.

	Args:
	data_file: Path to MT-Bench JSONL file

	Returns:
	tuple: (prompt_list, qid_list)
	- prompt_list: First turn raw prompts
	- qid_list: Question IDs
	"""
	with open(data_file, "r") as f:
	data = f.readlines()
	data_list = [json.loads(x) for x in data]

	qid_list = []
	prompt_list = []
	for item in data_list:
	qid_list.append(item['question_id'])
	first_question = item['turns'][0]

	prompt_list.append(first_question)

	return prompt_list, qid_list


	def preprocess_mtbench_secondturn(data_file, output_file):
	"""Preprocess MT-Bench second turn with ChatML template.

	Args:
	data_file: Path to MT-Bench JSONL file
	output_file: Model output file for the first turn of MT-Bench

	Returns:
	tuple: (prompt_list, qid_list)
	- prompt_list: Second turn prompts with conversation history
	- qid_list: Question IDs
	"""
	with open(data_file, "r") as f:
	data = f.readlines()
	data_list = [json.loads(x) for x in data]

	id2output = {}
	with open(output_file, "r") as f:
	for line in f:
	item = json.loads(line)
	id2output[item['task_id']] = item['output']

	qid_list = []
	prompt_list = []
	for item in data_list:
	qid_list.append(item['question_id'])
	first_question = item['turns'][0]
	second_question = item['turns'][1]

	model_output = id2output[item['question_id']]

	final_prompt = "<\|im_start\|>user\n" + first_question + "<\|im_end\|>\n<\|im_start\|>assistant\n" + model_output + "<\|im_end\|>\n" + \
	"<\|im_start\|>user\n" + second_question + "<\|im_end\|>\n<\|im_start\|>assistant\n"

	prompt_list.append(final_prompt)

	return prompt_list, qid_list


	def preprocess_mtbench_secondturn_raw(data_file, output_file):
	"""Preprocess MT-Bench second turn with raw formatting.

	Args:
	data_file: Path to MT-Bench JSONL file
	output_file: Model output file for the first turn of MT-Bench

	Returns:
	tuple: (prompt_list, qid_list)
	- prompt_list: Second turn prompts as chat message lists
	- qid_list: Question IDs
	"""
	with open(data_file, "r") as f:
	data = f.readlines()
	data_list = [json.loads(x) for x in data]

	id2output = {}
	with open(output_file, "r") as f:
	for line in f:
	item = json.loads(line)
	id2output[item['task_id']] = (item['output'], item['reason_text'])

	qid_list = []
	prompt_list = []
	for item in data_list:
	qid_list.append(item['question_id'])
	first_question = item['turns'][0]
	second_question = item['turns'][1]

	output, reason = id2output[item['question_id']]
	model_output = output

	chat = [
	{'role': 'user', 'content': first_question},
	{'role': 'assistant', 'content': model_output},
	{'role': 'user', 'content': second_question}
	]

	prompt_list.append(chat)

	return prompt_list, qid_list


	def preprocess_mmlu_chatml_template(data_file):
	"""Preprocess MMLU dataset with few-shot ChatML template.

	Args:
	data_file: Path to MMLU CSV file

	Returns:
	list: Formatted prompts with few-shot examples and ChatML template
	"""
	def _load_mmlu_cot_fewshot_examples():
	import yaml
	current_folder = os.path.dirname(os.path.abspath(__file__))
	fewshot_folder = os.path.join(current_folder, "flan_cot_fewshot")
	file_list = os.listdir(fewshot_folder)
	fewshot_dict = {}
	for filename in file_list:
	with open(os.path.join(fewshot_folder, filename)) as f:
	data = yaml.safe_load(f)
	dataset_name = data['dataset_name'].strip()
	description = data['description'].strip()
	sample_list = data['fewshot_config']["samples"]

	prompt = description
	for sample in sample_list:
	prompt += "\n\n"
	prompt += "Q: " + sample['question'].strip() + "\n" + "A: " + sample['target'].strip()

	fewshot_dict[dataset_name] = prompt

	return fewshot_dict

	fewshot_dict = _load_mmlu_cot_fewshot_examples()
	df = pandas.read_csv(data_file)
	test_list = [row.to_dict() for _, row in df.iterrows()]

	prompt_list = []
	for item in test_list:
	subject = item['Subject']
	fewshot_prompt = fewshot_dict[subject]
	question = item['Question']
	choice_a = str(item['A']).strip()
	choice_b = str(item['B']).strip()
	choice_c = str(item['C']).strip()
	choice_d = str(item['D']).strip()

	final_question = fewshot_prompt + "\n\n" + "Q: " + question + "\n"
	final_question += "(A) " + choice_a + " (B) " + choice_b + " (C) " + choice_c + " (D) " + choice_d + "\n"
	final_question += "A: "

	final_prompt = "<\|im_start\|>user\n" + final_question + "<\|im_end\|>\n<\|im_start\|>assistant\n"
	prompt_list.append(final_prompt)

	return prompt_list


	def preprocess_mmlu_raw_template(data_file):
	"""Preprocess MMLU dataset with few-shot raw formatting.

	Args:
	data_file: Path to MMLU CSV file

	Returns:
	list: Raw prompts with few-shot examples
	"""
	def _load_mmlu_cot_fewshot_examples():
	import yaml
	current_folder = os.path.dirname(os.path.abspath(__file__))
	fewshot_folder = os.path.join(current_folder, "flan_cot_fewshot")
	file_list = os.listdir(fewshot_folder)
	fewshot_dict = {}
	for filename in file_list:
	with open(os.path.join(fewshot_folder, filename)) as f:
	data = yaml.safe_load(f)
	dataset_name = data['dataset_name'].strip()
	description = data['description'].strip()
	sample_list = data['fewshot_config']["samples"]

	prompt = description
	for sample in sample_list:
	prompt += "\n\n"
	prompt += "Q: " + sample['question'].strip() + "\n" + "A: " + sample['target'].strip()

	fewshot_dict[dataset_name] = prompt

	return fewshot_dict

	fewshot_dict = _load_mmlu_cot_fewshot_examples()
	df = pandas.read_csv(data_file)
	test_list = [row.to_dict() for _, row in df.iterrows()]

	prompt_list = []
	for item in test_list:
	subject = item['Subject']
	fewshot_prompt = fewshot_dict[subject]
	question = item['Question']
	choice_a = str(item['A']).strip()
	choice_b = str(item['B']).strip()
	choice_c = str(item['C']).strip()
	choice_d = str(item['D']).strip()

	final_question = fewshot_prompt + "\n\n" + "Q: " + question + "\n"
	final_question += "(A) " + choice_a + " (B) " + choice_b + " (C) " + choice_c + " (D) " + choice_d + "\n"
	final_question += "A: "

	prompt_list.append(final_question)

	return prompt_list


	QUERY_TEMPLATE_MULTICHOICE = """
	Answer the following multiple choice question. The last line of your response should be of the following format: 'Answer: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering.

	{Question}

	A) {A}
	B) {B}
	C) {C}
	D) {D}
	""".strip()

	def preprocess_mmlu_r1_raw_template(data_file):
	"""Preprocess MMLU dataset for R1-style models with zero-shot prompting.

	Args:
	data_file: Path to MMLU CSV file

	Returns:
	list: Formatted prompts for R1-style reasoning models
	"""
	df = pandas.read_csv(data_file)
	test_list = [row.to_dict() for _, row in df.iterrows()]

	prompt_list = []



	for item in test_list:
	subject = item['Subject']
	question = item['Question']
	choice_a = str(item['A']).strip()
	choice_b = str(item['B']).strip()
	choice_c = str(item['C']).strip()
	choice_d = str(item['D']).strip()

	# final_question = fewshot_prompt + "\n\n" + "Q: " + question + "\n"
	# final_question += "(A) " + choice_a + " (B) " + choice_b + " (C) " + choice_c + " (D) " + choice_d + "\n"
	# final_question += "A: "
	final_question = QUERY_TEMPLATE_MULTICHOICE.format(Question=question, A=choice_a, B=choice_b, C=choice_c, D=choice_d)

	prompt_list.append(final_question)

	return prompt_list

	def preprocess_mmlu_r1_raw_template_wdai(data_file):
	"""Preprocess MMLU dataset with boxed answer format (alternative R1 style).

	Args:
	data_file: Path to MMLU CSV file

	Returns:
	list: Formatted prompts with boxed answer instruction
	"""
	MMLU_QUERY_TEMPLATE_MULTICHOICE = "Answer the following multiple-choice question. At the end of your response, conclude with the sentence `The answer is \\boxed{{X}}.`, replacing X with the correct capital letter of your choice.\n\n{Question}\n\nAnswer Choices:\n(A) {choice1}\n(B) {choice2}\n(C) {choice3}\n(D) {choice4}"
	df = pandas.read_csv(data_file)
	test_list = [row.to_dict() for _, row in df.iterrows()]

	prompt_list = []
	for item in test_list:
	choices_dict = dict(
	Question=item['Question'].strip(),
	choice1=str(item['A']).strip(),
	choice2=str(item['B']).strip(),
	choice3=str(item['C']).strip(),
	choice4=str(item['D']).strip()
	)
	final_question = MMLU_QUERY_TEMPLATE_MULTICHOICE.format(**choices_dict)

	prompt_list.append(final_question)

	return prompt_list


	def preprocess_mmlu_pro_chatml_template(data_file, fewshot_file):
	"""Preprocess MMLU-Pro dataset with 5-shot ChatML template.

	Args:
	data_file: Path to MMLU-Pro test JSON file
	fewshot_file: Path to MMLU-Pro validation JSON file (for few-shot examples)

	Returns:
	list: Formatted prompts with 5-shot examples and ChatML template
	"""
	def _preprocess(data_list):
	output_list = []
	for item in data_list:
	options = []
	for opt in item["options"]:
	if opt == "N/A":
	continue
	options.append(opt)
	item["options"] = options
	output_list.append(item)
	return output_list

	def _categorize_basedon_subject(data_list):
	fewshot_dict = {}
	for item in data_list:
	subject = item['category']
	if subject in fewshot_dict:
	fewshot_dict[subject].append(item)
	else:
	fewshot_dict[subject] = [item]
	return fewshot_dict

	def _format_each_sample(sample, is_test):
	choices = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P"]

	question = sample['question']
	options = sample['options']

	sample_prompt = "Q: " + question + "\n"
	for i, opt in enumerate(options):
	sample_prompt += "(%s) %s " % (choices[i], opt)
	sample_prompt = sample_prompt.strip() + "\n"

	if is_test:
	sample_prompt += "A: "
	else:
	sample_prompt += sample['cot_content'].strip()

	return sample_prompt

	def _get_fewshot_prompt(fewshot_samples, test_sample, subject):
	description = "The following are multiple choice questions (with answers) about %s." % subject

	final_question = description + "\n\n"
	for sample in fewshot_samples:
	sample_prompt = _format_each_sample(sample, is_test=False)
	final_question += sample_prompt + "\n\n"

	test_prompt = _format_each_sample(test_sample, is_test=True)
	final_question += test_prompt

	return final_question

	with open(fewshot_file, "r") as f:
	fewshot_list = json.load(f)
	with open(data_file, "r") as f:
	test_list = json.load(f)

	fewshot_list = _preprocess(fewshot_list)
	fewshot_dict = _categorize_basedon_subject(fewshot_list)
	test_list = _preprocess(test_list)

	prompt_list = []
	for test_sample in test_list:
	subject = test_sample['category']
	fewshot_samples = fewshot_dict[subject]
	## 5-shot examples
	assert len(fewshot_samples) == 5
	final_question = _get_fewshot_prompt(fewshot_samples, test_sample, subject)
	final_prompt = "<\|im_start\|>user\n" + final_question + "<\|im_end\|>\n<\|im_start\|>assistant\n"

	prompt_list.append(final_prompt)

	return prompt_list


	def preprocess_mmlu_pro_zero_shot_chatml_template(data_file, think=True):
	"""Preprocess MMLU-Pro dataset with zero-shot ChatML template.

	Args:
	data_file: Path to MMLU-Pro test JSON file
	think: Whether to enable thinking mode (default: True)

	Returns:
	list: Formatted prompts with ChatML template and boxed answer instruction
	"""
	def _preprocess(data_list):
	output_list = []
	for item in data_list:
	options = []
	for opt in item["options"]:
	if opt == "N/A":
	continue
	options.append(opt)
	item["options"] = options
	output_list.append(item)
	return output_list

	def _format_each_sample(sample):
	choices = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P"]

	question = sample['question']
	options = sample['options']

	# sample_prompt = "Answer the following multiple-choice question. At the end of your response, conclude with the sentence `The answer is \\boxed{{X}}.`, replacing X with the correct capital letter of your choice.\n\n"
	# sample_prompt += question + "\n\nAnswer Choices:"
	# for i, opt in enumerate(options):
	# sample_prompt += "\n(%s) %s" % (choices[i], opt)
	# sample_prompt = sample_prompt.strip() + "\n"

	sample_prompt = "Question:\n" + question + "\n\nAnswer Choices:"
	for i, opt in enumerate(options):
	sample_prompt += "\n(%s) %s" % (choices[i], opt)
	sample_prompt += "\n\nConclude your response with the sentence `The answer is \\boxed{{X}}.`, in which X is the correct capital letter of your choice."
	sample_prompt = sample_prompt.strip() + "\n"

	return sample_prompt

	instruction = "<\|im_start\|>system\nYou are a helpful and harmless assistant.<\|im_end\|>\n"

	with open(data_file, "r") as f:
	test_list = json.load(f)
	test_list = _preprocess(test_list)

	prompt_list = []
	for test_sample in test_list:
	test_prompt = _format_each_sample(test_sample)
	if think:
	final_prompt = instruction + "<\|im_start\|>user\n" + test_prompt + "\n /think<\|im_end\|>\n<\|im_start\|>assistant\n<think>\n"
	else:
	final_prompt = instruction + "<\|im_start\|>user\n" + test_prompt + "\n /no_think<\|im_end\|>\n<\|im_start\|>assistant\n"
	prompt_list.append(final_prompt)

	return prompt_list


	def preprocess_mmlu_pro_zero_shot_raw_template(data_file, think=True):
	"""Preprocess MMLU-Pro dataset with zero-shot raw formatting.

	Args:
	data_file: Path to MMLU-Pro test JSON file
	think: Whether to enable thinking mode (default: True, currently unused)

	Returns:
	list: Raw prompts with boxed answer instruction
	"""
	def _preprocess(data_list):
	output_list = []
	for item in data_list:
	options = []
	for opt in item["options"]:
	if opt == "N/A":
	continue
	options.append(opt)
	item["options"] = options
	output_list.append(item)
	return output_list

	def _format_each_sample(sample):
	choices = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P"]

	question = sample['question']
	options = sample['options']

	# sample_prompt = "Answer the following multiple-choice question. At the end of your response, conclude with the sentence `The answer is \\boxed{{X}}.`, replacing X with the correct capital letter of your choice.\n\n"
	# sample_prompt += question + "\n\nAnswer Choices:"
	# for i, opt in enumerate(options):
	# sample_prompt += "\n(%s) %s" % (choices[i], opt)
	# sample_prompt = sample_prompt.strip() + "\n"

	sample_prompt = "Question:\n" + question + "\n\nAnswer Choices:"
	for i, opt in enumerate(options):
	sample_prompt += "\n(%s) %s" % (choices[i], opt)
	sample_prompt += "\n\nConclude your response with the sentence `The answer is \\boxed{{X}}.`, in which X is the correct capital letter of your choice."
	sample_prompt = sample_prompt.strip() + "\n"

	return sample_prompt

	with open(data_file, "r") as f:
	test_list = json.load(f)
	test_list = _preprocess(test_list)

	prompt_list = []
	for test_sample in test_list:
	test_prompt = _format_each_sample(test_sample)
	prompt_list.append(test_prompt)

	return prompt_list


	def preprocess_livecodebench_chatml_template(data_file):
	"""Preprocess LiveCodeBench dataset with ChatML template.

	Args:
	data_file: Path to LiveCodeBench JSON file

	Returns:
	tuple: (prompt_list, qid_list)
	- prompt_list: Formatted coding prompts with ChatML template
	- qid_list: Question IDs
	"""
	instruction = "<\|im_start\|>system\nYou are a helpful and harmless assistant. You should think step-by-step.<\|im_end\|>\n"

	code_instruction_nostartercode = """Write Python code to solve the problem. Please place the solution code in the following format:\n```python\n# Your solution code here\n```"""
	code_instruction_hasstartercode = """Please place the solution code in the following format:\n```python\n# Your solution code here\n```"""

	with open(data_file, "r") as f:
	data_list = json.load(f)

	prompt_list = []
	qid_list = []
	for item in data_list:
	question = item['question_content'].strip()
	if item['starter_code'] != "":
	question += "\n\n" + "Solve the problem starting with the provided function header.\n\nFunction header:\n" + "```\n" + item['starter_code'] + "\n```"
	question += "\n\n" + code_instruction_hasstartercode
	else:
	question += "\n\n" + code_instruction_nostartercode

	final_prompt = instruction + "<\|im_start\|>user\n" + question + "<\|im_end\|>\n<\|im_start\|>assistant\n<think>\n"

	prompt_list.append(final_prompt)
	qid_list.append(item['question_id'])

	return prompt_list, qid_list


	def preprocess_livecodebench_raw(data_file):
	"""Preprocess LiveCodeBench dataset with raw formatting.

	Args:
	data_file: Path to LiveCodeBench JSON file

	Returns:
	tuple: (prompt_list, qid_list)
	- prompt_list: Raw coding prompts
	- qid_list: Question IDs
	"""
	code_instruction_nostartercode = """Write Python code to solve the problem. Please place the solution code in the following format:\n```python\n# Your solution code here\n```"""
	code_instruction_hasstartercode = """Please place the solution code in the following format:\n```python\n# Your solution code here\n```"""

	with open(data_file, "r") as f:
	data_list = json.load(f)

	prompt_list = []
	qid_list = []
	for item in data_list:
	question = item['question_content'].strip()
	if item['starter_code'] != "":
	question += "\n\n" + "Solve the problem starting with the provided function header.\n\nFunction header:\n" + "```\n" + \
	item['starter_code'] + "\n```"
	question += "\n\n" + code_instruction_hasstartercode
	else:
	question += "\n\n" + code_instruction_nostartercode

	final_prompt = question

	prompt_list.append(final_prompt)
	qid_list.append(item['question_id'])

	return prompt_list, qid_list


	def preprocess_mbpp_chatml_template(data_file):
	"""Preprocess MBPP (Mostly Basic Python Problems) dataset with ChatML template.

	Args:
	data_file: Path to MBPP JSON file

	Returns:
	tuple: (prompt_list, qid_list)
	- prompt_list: Formatted code generation prompts with ChatML template
	- qid_list: Task IDs
	"""
	instruction = "<\|im_start\|>system\nYou are a helpful and harmless assistant. You should think step-by-step.<\|im_end\|>\n"

	with open(data_file, "r") as f:
	data_dict = json.load(f)

	prompt_list = []
	qid_list = []

	for key, value in data_dict.items():
	qid_list.append(key)
	question = value.get('text', value.get('prompt', ''))
	final_prompt = instruction + "<\|im_start\|>user\n" + question + "<\|im_end\|>\n<\|im_start\|>assistant\n<think>\n"
	prompt_list.append(final_prompt)

	return prompt_list, qid_list


	def preprocess_mmlu_stem_chatml_template(data_file):
	"""Preprocess MMLU STEM subset with ChatML template.

	Args:
	data_file: Path to MMLU STEM JSON file

	Returns:
	list: Formatted prompts with ChatML template
	"""
	instruction = "<\|im_start\|>system\nYou are a helpful and harmless assistant. You should think step-by-step.<\|im_end\|>\n"

	with open(data_file, "r") as f:
	data_list = json.load(f)

	prompt_list = []
	for item in data_list:
	question = item['question'].strip()
	choices = item.get('choices', [])

	final_question = question + "\n"
	if choices:
	for i, choice in enumerate(choices):
	final_question += f"({chr(65+i)}) {choice}\n"

	final_prompt = instruction + "<\|im_start\|>user\n" + final_question + "<\|im_end\|>\n<\|im_start\|>assistant\n<think>\n"
	prompt_list.append(final_prompt)

	return prompt_list