Tsukihjy/testcase / methods /Hardtest /load_response.py
Tsukihjy's picture
download
raw
7.03 kB
import json
import os
def read_jsonl(file_path):
data = []
with open(file_path, 'r') as file:
for line in file:
data.append(json.loads(line))
return data
import re
def extract_code(ans_str):
pattern = r'```json\n(.*?)```'
matches = re.findall(pattern, ans_str, re.DOTALL)
return matches[-1]
def extract_content_code(ans_str):
pattern = r'<ASSISTANT>(.*?)</ASSISTANT>'
matches = re.findall(pattern, ans_str, re.DOTALL)
return matches[-1]
# def load_qwen3_result(repsonse_path):
# test_func_list = read_jsonl(repsonse_path)
# for item in test_func_list:
# valid_func = json.loads(extract_code(item['response_validator']))['input_validator']
# response_dict = json.loads(extract_code(item["response_generator"]))
# prompt_tests = response_dict['directly_generated_inputs']
# random_func = response_dict['regular_input_generator']
# edge_func = response_dict['hacking_input_generator']
# item['func_list'] = {
# "prompt": [],
# "edge": [],
# "random": [],
# "validate_func": []
# }
# return test_func_list
def get_response_function(repsonse_path, model_name, test_al):
model_name_trans = {
"qwen3-nothink": "qwen3-235b-a22b",
"claude4":"claude-sonnet-4-20250514"
}
ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8"))
rank_dict = {}
for item in ds:
rank_dict[item['tcb_id']] = len(item['wrong_code'])
gen_nums = json.load(open(f"/home/luoxianzhen/yang/tests_count/update/{model_name}-{test_al}-tests-count.json", "r", encoding="utf-8"))
if model_name in model_name_trans.keys():
model_name = model_name_trans[model_name]
test_func_list = read_jsonl(repsonse_path.format(test_al, model_name))
test_functions = {}
for response_item in test_func_list:
try:
valid_func = json.loads(extract_code(response_item['response_validator']))['input_validator']
response_dict = json.loads(extract_code(response_item["response_generator"]))
prompt_tests = response_dict['directly_generated_inputs']
random_func = response_dict['regular_input_generator']
edge_func = response_dict['hacking_input_generator']
except:
continue
if response_item['tcb_id'] not in test_functions.keys():
test_functions[response_item['tcb_id']] = {
"prompt": [],
"edge": [],
"random": [],
"validate_func": []
}
test_functions[response_item['tcb_id']]['prompt'] += prompt_tests
test_functions[response_item['tcb_id']]['random'].append(random_func)
test_functions[response_item['tcb_id']]['edge'].append(edge_func)
test_functions[response_item['tcb_id']]['validate_func'].append(valid_func)
func_data = []
for k, v in test_functions.items():
if k not in rank_dict.keys():
continue
if 'gen_nums' in gen_nums[k].keys() and gen_nums[k]['gen_nums'] > 0:
continue
func_data.append({
"tcb_id": k,
"func_list": v,
'limit_nums': rank_dict[k] * 5 - gen_nums[k]['gen_nums'] if 'gen_nums' in gen_nums[k].keys() else rank_dict[k] * 5
})
return func_data
def load_data(test_inputs):
ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8"))
res = []
for item in ds:
tests = [test_list for test_list in test_inputs if test_list["tcb_id"] == item['tcb_id']]
if len(tests) <= 0:
continue
tests = tests[0]['generate_testcases']
for c in (item['solutions'][0:3]):
res.append({
"code": c['code'],
"time_limit": item["runtime_limit"],
"memory_limit": item["memory_limit"],
"compileAndRunOptions": c["compileAndRunOptions"],
"test_cases": tests,
"problem_id": item['tcb_id'],
})
return res
def check_none_ht(repsonse_path, model_name, test_al):
model_name_trans = {
"qwen3-nothink": "qwen3-235b-a22b",
"claude4":"claude-sonnet-4-20250514"
}
ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8"))
rank_dict = {}
for item in ds:
rank_dict[item['tcb_id']] = len(item['wrong_code'])
gen_nums = json.load(open(f"/home/luoxianzhen/yang/tests_count/update/{model_name}-{test_al}-tests-count.json", "r", encoding="utf-8"))
if model_name in model_name_trans.keys():
model_name = model_name_trans[model_name]
test_func_list = read_jsonl(repsonse_path.format(test_al, model_name))
test_functions = {}
fail_generate_json = 0
for response_item in test_func_list:
try:
valid_func = json.loads(extract_code(response_item['response_validator']))['input_validator']
response_dict = json.loads(extract_code(response_item["response_generator"]))
prompt_tests = response_dict['directly_generated_inputs']
random_func = response_dict['regular_input_generator']
edge_func = response_dict['hacking_input_generator']
# if edge_func is None:
# None_response_hack += 1
except:
fail_generate_json += 1
continue
if response_item['tcb_id'] not in test_functions.keys():
test_functions[response_item['tcb_id']] = {
"prompt": [],
"edge": [],
"random": [],
"validate_func": []
}
test_functions[response_item['tcb_id']]['prompt'] += prompt_tests
test_functions[response_item['tcb_id']]['random'].append(random_func)
test_functions[response_item['tcb_id']]['edge'].append(edge_func)
test_functions[response_item['tcb_id']]['validate_func'].append(valid_func)
fail_generate = 0
None_response_hack = 0
func_data = []
for k, v in test_functions.items():
func_data.append({
"tcb_id": k,
"func_list": v,
# 'limit_nums': rank_dict[k] * 5 - gen_nums[k]['gen_nums'] if 'gen_nums' in gen_nums[k].keys() else rank_dict[k] * 5
})
curr = 0
for code in v["edge"]:
if code is None:
None_response_hack += 1
curr += 1
if len(v["edge"]) == curr:
fail_generate += 1
print(f"Json 加载失败:{fail_generate_json} 0 个Edge_generator {fail_generate}, Edge_generator is None: {None_response_hack}")
return func_data
if __name__ == "__main__":
check_none_ht("/home/luoxianzhen/yang/data/response-orginal/orginal_response_{}_{}.jsonl", "Qwen2.5-Coder-32B-Instruct", 'ht')

Xet Storage Details

Size:
7.03 kB
·
Xet hash:
72d81af9d51489ec696e331bf6570b45733a87706ab4ee9e52f3142a4aaaff94

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.