Tsukihjy/testcase / methods /CruxEval /load_response.py
Tsukihjy's picture
download
raw
4.24 kB
import json
import os
def read_jsonl(file_path):
data = []
with open(file_path, 'r') as file:
for line in file:
data.append(json.loads(line))
return data
import re
def extract_code(ans_str):
pattern = r'```python\n(.*?)```'
matches = re.findall(pattern, ans_str, re.DOTALL)
return matches[-1]
def extract_content_code(ans_str):
pattern = r'<ASSISTANT>(.*?)</ASSISTANT>'
matches = re.findall(pattern, ans_str, re.DOTALL)
return matches[-1]
def extract_json(ans_str):
pattern = r'```json\n(.*?)```'
matches = re.findall(pattern, ans_str, re.DOTALL)
return matches[-1]
def load_qwen3_result(repsonse_path):
test_func_list = read_jsonl(repsonse_path)
tests_response = {}
for response_item in test_func_list:
try:
tests = json.loads(extract_json(response_item['code_and_test']))
except:
continue
if response_item['tcb_id'] not in tests_response:
tests_response[response_item['tcb_id']] = tests
else:
tests_response[response_item['tcb_id']] += tests
return tests_response
def get_response_function(repsonse_path, model_name):
# passed = read_jsonl(f"/home/luoxianzhen/yang/save_tests_{model_name}/crux/test_pass_rate.jsonl")
# has_done = []
# for item in passed:
# has_done.append(item['tcb_id'])
test_func_list = read_jsonl(repsonse_path)
output_error_count = 0
total_count = 0
tests_response = {}
for response_item in test_func_list:
try:
tests = json.loads(extract_json(response_item['response']))
except:
continue
if response_item['tcb_id'] not in tests_response:
tests_response[response_item['tcb_id']] = tests
else:
tests_response[response_item['tcb_id']] += tests
# it_count = 0
# for k, v in tests_response.items():
# total_count += len(v)
# has = False
# for test in v:
# if "output" in test.keys() and isinstance(test['output'], list) and isinstance(test['input'], str):
# output_error_count += 1
# has = True
# if has:
# it_count += 1
# print(f"{model_name} total {total_count} error {output_error_count} has {it_count}")
curr_tests_response = {}
for k, v in tests_response.items():
save_flag = True
# for test in v:
# if "output" in test.keys() and isinstance(test['output'], list) and isinstance(test['input'], str):
# save_flag = True
if save_flag:
curr_tests_response[k] = v
return curr_tests_response
def load_data(test_inputs):
ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8"))
res = []
for item in ds:
if item['tcb_id'] not in test_inputs:
continue
tests = test_inputs[item['tcb_id']]
if len(tests) <= 0:
continue
for c in (item['solutions'][0:3]):
res.append({
"code": c['code'],
"time_limit": item["runtime_limit"],
"memory_limit": item["memory_limit"],
"compileAndRunOptions": c["compileAndRunOptions"],
"test_cases": tests,
"problem_id": item['tcb_id'],
})
return res
if __name__ == "__main__":
# data = get_response_function(repsonse_path="/home/luoxianzhen/yang/data/response-orginal/orginal_response_crux_Qwen2.5-14B-Instruct.jsonl", model_name="Qwen2.5-14B-Instruct")
# res = load_data(data)
model_name_list=[
"claude-sonnet-4-20250514-thinking",
"deepseek-v3",
"qwen-coder-plus",
"gpt-4o",
"qwen-coder-plus",
"Qwen2.5-7B-Instruct",
"Qwen2.5-14B-Instruct",
"Qwen2.5-32B-Instruct",
"Qwen2.5-Coder-7B-Instruct",
"Qwen2.5-Coder-14B-Instruct",
"Qwen2.5-Coder-32B-Instruct",
"claude-sonnet-4-20250514",
"qwen3-235b-a22b"
]
for model_name in model_name_list:
get_response_function(repsonse_path=f"/home/luoxianzhen/yang/data/response-orginal/orginal_response_crux_{model_name}.jsonl", model_name=model_name)

Xet Storage Details

Size:
4.24 kB
·
Xet hash:
ff102ab9c08e3aa0edbf524078f66b22545bc71d7b3fcf15ba1900717aca1e2d

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.