Tsukihjy's picture
download
raw
3.1 kB
import sys
sys.path.append("C:\\Users\\19307\\Desktop\\TestCase-Gen\\methods\\utils")
from response import TurboResponser, OpenResponser
from dataset_all import get_datasets_by_name
from is_correct import test_output_comparison
from config import cfg
from prompt import System_prompt, input_template_gen_test, input_template_gen_answer
from typing import List, Optional
import json
from execute_tool import get_successful_tests_slow
def write_json_to_file(data, filepath):
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=4)
import re
def extract_code(ans_str):
pattern = r'```python\n(.*?)```'
matches = re.findall(pattern, ans_str, re.DOTALL)
return matches[-1]
def create_test_cases_using_gpt(
responser,
dataset_list,
) -> None:
"""Use this program to create tests cases for raw (badly formatted) questions and source code. Ex: Evol"""
results = {}
for item in dataset_list:
program = item["solution"]
instruction = item["query"]
input_message = input_template_gen_test.replace("{program}", program).replace("{instruction}", instruction).replace('{test_gen_nums}', str(cfg.test_gen_nums))
ans = responser.respond(System_prompt, input_message, temperature=0.7)
if "```json" in ans:
pattern = r'```json\n(.*?)```'
matches = re.findall(pattern, ans, re.DOTALL)
ans = matches[-1]
data = json.loads(ans.replace('\\', '\\\\'))
results[item["problem_id"]] = data
return results
def stage_2_test(responser, al_dataset):
saved_tests = {}
for item in al_dataset:
query = item["trans_description"]
tests = item["gen_tests"]
res = responser.respond(
System_prompt,
input_template_gen_answer.replace("{question_prompt}", query).replace("{assert}", tests[0])
)
answer_code = extract_code(res)
tests_status = get_successful_tests_slow(program=answer_code, tests=tests, max_execution_time=item["runtime_limit"])
saved_tests[item["problem_id"]] = [s for s, m in zip(tests, tests_status) if m == 1]
return saved_tests
def acecoder():
# responser_stage1 = TurboResponser(cfg.model_name_test_gen)
# responser_stage2 = OpenResponser(model=cfg.model_name_answer_gen)
# Qwen 32B有点太差了
responser_stage1 = TurboResponser("gpt-4o")
responser_stage2 = TurboResponser("gpt-4o")
al_dataset = get_datasets_by_name(cfg.dataset_name)[1:2]
gen_list = create_test_cases_using_gpt(responser_stage2, al_dataset)
for item in al_dataset:
item["trans_description"] = gen_list[item["problem_id"]]["question"]
item["gen_tests"] = gen_list[item["problem_id"]]["tests"]
saved_tests = stage_2_test(responser_stage2, al_dataset)
print(f"{item['gen_tests']} testcase nums: {(len(saved_tests))}")
write_to_json(saved_tests, cfg.tests_path)
if __name__ == "__main__":
acecoder()

Xet Storage Details

Size:
3.1 kB
·
Xet hash:
04294351eb3d91409bc6bd5bdd3cb230399b0b5dd74c14a4149ac700f674ca18

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.