Buckets:

Tsukihjy
/

testcase

Files

xet

Tsukihjy/testcase / testcase-data /method /ACECODER /pipeline.py

Tsukihjy

about 2 months ago

download

raw

3.1 kB

	import sys
	sys.path.append("C:\\Users\\19307\\Desktop\\TestCase-Gen\\methods\\utils")
	from response import TurboResponser, OpenResponser
	from dataset_all import get_datasets_by_name
	from is_correct import test_output_comparison
	from config import cfg
	from prompt import System_prompt, input_template_gen_test, input_template_gen_answer
	from typing import List, Optional
	import json
	from execute_tool import get_successful_tests_slow

	def write_json_to_file(data, filepath):
	with open(filepath, 'w', encoding='utf-8') as f:
	json.dump(data, f, ensure_ascii=False, indent=4)

	import re

	def extract_code(ans_str):
	pattern = r'```python\n(.*?)```'
	matches = re.findall(pattern, ans_str, re.DOTALL)
	return matches[-1]


	def create_test_cases_using_gpt(
	responser,
	dataset_list,
	) -> None:
	"""Use this program to create tests cases for raw (badly formatted) questions and source code. Ex: Evol"""

	results = {}
	for item in dataset_list:
	program = item["solution"]
	instruction = item["query"]
	input_message = input_template_gen_test.replace("{program}", program).replace("{instruction}", instruction).replace('{test_gen_nums}', str(cfg.test_gen_nums))
	ans = responser.respond(System_prompt, input_message, temperature=0.7)
	if "```json" in ans:
	pattern = r'```json\n(.*?)```'
	matches = re.findall(pattern, ans, re.DOTALL)
	ans = matches[-1]
	data = json.loads(ans.replace('\\', '\\\\'))
	results[item["problem_id"]] = data

	return results




	def stage_2_test(responser, al_dataset):
	saved_tests = {}
	for item in al_dataset:
	query = item["trans_description"]
	tests = item["gen_tests"]
	res = responser.respond(
	System_prompt,
	input_template_gen_answer.replace("{question_prompt}", query).replace("{assert}", tests[0])
	)
	answer_code = extract_code(res)
	tests_status = get_successful_tests_slow(program=answer_code, tests=tests, max_execution_time=item["runtime_limit"])
	saved_tests[item["problem_id"]] = [s for s, m in zip(tests, tests_status) if m == 1]
	return saved_tests


	def acecoder():
	# responser_stage1 = TurboResponser(cfg.model_name_test_gen)
	# responser_stage2 = OpenResponser(model=cfg.model_name_answer_gen)
	# Qwen 32B有点太差了
	responser_stage1 = TurboResponser("gpt-4o")
	responser_stage2 = TurboResponser("gpt-4o")

	al_dataset = get_datasets_by_name(cfg.dataset_name)[1:2]

	gen_list = create_test_cases_using_gpt(responser_stage2, al_dataset)
	for item in al_dataset:
	item["trans_description"] = gen_list[item["problem_id"]]["question"]
	item["gen_tests"] = gen_list[item["problem_id"]]["tests"]

	saved_tests = stage_2_test(responser_stage2, al_dataset)

	print(f"{item['gen_tests']} testcase nums: {(len(saved_tests))}")

	write_to_json(saved_tests, cfg.tests_path)
	if __name__ == "__main__":
	acecoder()

Xet Storage Details

Size:: 3.1 kB
Xet hash:: 04294351eb3d91409bc6bd5bdd3cb230399b0b5dd74c14a4149ac700f674ca18

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.