Buckets:

Tsukihjy
/

testcase

Files

xet

Tsukihjy/testcase / methods /Hardtest /load_response.py

Tsukihjy

about 1 month ago

download

raw

7.03 kB

	import json
	import os


	def read_jsonl(file_path):
	data = []
	with open(file_path, 'r') as file:
	for line in file:
	data.append(json.loads(line))
	return data


	import re

	def extract_code(ans_str):
	pattern = r'```json\n(.*?)```'
	matches = re.findall(pattern, ans_str, re.DOTALL)
	return matches[-1]

	def extract_content_code(ans_str):
	pattern = r'<ASSISTANT>(.*?)</ASSISTANT>'
	matches = re.findall(pattern, ans_str, re.DOTALL)
	return matches[-1]

	# def load_qwen3_result(repsonse_path):
	# test_func_list = read_jsonl(repsonse_path)

	# for item in test_func_list:
	# valid_func = json.loads(extract_code(item['response_validator']))['input_validator']
	# response_dict = json.loads(extract_code(item["response_generator"]))
	# prompt_tests = response_dict['directly_generated_inputs']
	# random_func = response_dict['regular_input_generator']
	# edge_func = response_dict['hacking_input_generator']


	# item['func_list'] = {
	# "prompt": [],
	# "edge": [],
	# "random": [],
	# "validate_func": []
	# }
	# return test_func_list

	def get_response_function(repsonse_path, model_name, test_al):
	model_name_trans = {
	"qwen3-nothink": "qwen3-235b-a22b",
	"claude4":"claude-sonnet-4-20250514"
	}
	ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8"))
	rank_dict = {}
	for item in ds:
	rank_dict[item['tcb_id']] = len(item['wrong_code'])

	gen_nums = json.load(open(f"/home/luoxianzhen/yang/tests_count/update/{model_name}-{test_al}-tests-count.json", "r", encoding="utf-8"))
	if model_name in model_name_trans.keys():
	model_name = model_name_trans[model_name]
	test_func_list = read_jsonl(repsonse_path.format(test_al, model_name))

	test_functions = {}
	for response_item in test_func_list:
	try:

	valid_func = json.loads(extract_code(response_item['response_validator']))['input_validator']

	response_dict = json.loads(extract_code(response_item["response_generator"]))
	prompt_tests = response_dict['directly_generated_inputs']
	random_func = response_dict['regular_input_generator']
	edge_func = response_dict['hacking_input_generator']
	except:
	continue

	if response_item['tcb_id'] not in test_functions.keys():
	test_functions[response_item['tcb_id']] = {
	"prompt": [],
	"edge": [],
	"random": [],
	"validate_func": []
	}
	test_functions[response_item['tcb_id']]['prompt'] += prompt_tests
	test_functions[response_item['tcb_id']]['random'].append(random_func)
	test_functions[response_item['tcb_id']]['edge'].append(edge_func)
	test_functions[response_item['tcb_id']]['validate_func'].append(valid_func)

	func_data = []
	for k, v in test_functions.items():
	if k not in rank_dict.keys():
	continue
	if 'gen_nums' in gen_nums[k].keys() and gen_nums[k]['gen_nums'] > 0:
	continue
	func_data.append({
	"tcb_id": k,
	"func_list": v,
	'limit_nums': rank_dict[k] * 5 - gen_nums[k]['gen_nums'] if 'gen_nums' in gen_nums[k].keys() else rank_dict[k] * 5
	})

	return func_data

	def load_data(test_inputs):
	ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8"))

	res = []

	for item in ds:
	tests = [test_list for test_list in test_inputs if test_list["tcb_id"] == item['tcb_id']]
	if len(tests) <= 0:
	continue
	tests = tests[0]['generate_testcases']
	for c in (item['solutions'][0:3]):
	res.append({
	"code": c['code'],
	"time_limit": item["runtime_limit"],
	"memory_limit": item["memory_limit"],
	"compileAndRunOptions": c["compileAndRunOptions"],
	"test_cases": tests,
	"problem_id": item['tcb_id'],
	})
	return res





	def check_none_ht(repsonse_path, model_name, test_al):
	model_name_trans = {
	"qwen3-nothink": "qwen3-235b-a22b",
	"claude4":"claude-sonnet-4-20250514"
	}
	ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8"))
	rank_dict = {}
	for item in ds:
	rank_dict[item['tcb_id']] = len(item['wrong_code'])

	gen_nums = json.load(open(f"/home/luoxianzhen/yang/tests_count/update/{model_name}-{test_al}-tests-count.json", "r", encoding="utf-8"))
	if model_name in model_name_trans.keys():
	model_name = model_name_trans[model_name]
	test_func_list = read_jsonl(repsonse_path.format(test_al, model_name))



	test_functions = {}
	fail_generate_json = 0
	for response_item in test_func_list:
	try:
	valid_func = json.loads(extract_code(response_item['response_validator']))['input_validator']

	response_dict = json.loads(extract_code(response_item["response_generator"]))
	prompt_tests = response_dict['directly_generated_inputs']
	random_func = response_dict['regular_input_generator']
	edge_func = response_dict['hacking_input_generator']

	# if edge_func is None:
	# None_response_hack += 1

	except:
	fail_generate_json += 1
	continue

	if response_item['tcb_id'] not in test_functions.keys():
	test_functions[response_item['tcb_id']] = {
	"prompt": [],
	"edge": [],
	"random": [],
	"validate_func": []
	}
	test_functions[response_item['tcb_id']]['prompt'] += prompt_tests
	test_functions[response_item['tcb_id']]['random'].append(random_func)
	test_functions[response_item['tcb_id']]['edge'].append(edge_func)
	test_functions[response_item['tcb_id']]['validate_func'].append(valid_func)


	fail_generate = 0
	None_response_hack = 0

	func_data = []
	for k, v in test_functions.items():
	func_data.append({
	"tcb_id": k,
	"func_list": v,
	# 'limit_nums': rank_dict[k] * 5 - gen_nums[k]['gen_nums'] if 'gen_nums' in gen_nums[k].keys() else rank_dict[k] * 5
	})
	curr = 0
	for code in v["edge"]:
	if code is None:
	None_response_hack += 1
	curr += 1
	if len(v["edge"]) == curr:
	fail_generate += 1
	print(f"Json 加载失败:{fail_generate_json} 0 个Edge_generator {fail_generate}, Edge_generator is None: {None_response_hack}")

	return func_data


	if __name__ == "__main__":
	check_none_ht("/home/luoxianzhen/yang/data/response-orginal/orginal_response_{}_{}.jsonl", "Qwen2.5-Coder-32B-Instruct", 'ht')

Xet Storage Details

Size:: 7.03 kB
Xet hash:: 72d81af9d51489ec696e331bf6570b45733a87706ab4ee9e52f3142a4aaaff94

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.