Buckets:

Tsukihjy
/

testcase

Files

xet

Tsukihjy/testcase / methods /CruxEval /load_response.py

Tsukihjy

about 2 months ago

download

raw

4.24 kB

	import json
	import os


	def read_jsonl(file_path):
	data = []
	with open(file_path, 'r') as file:
	for line in file:
	data.append(json.loads(line))
	return data


	import re

	def extract_code(ans_str):
	pattern = r'```python\n(.*?)```'
	matches = re.findall(pattern, ans_str, re.DOTALL)
	return matches[-1]

	def extract_content_code(ans_str):
	pattern = r'<ASSISTANT>(.*?)</ASSISTANT>'
	matches = re.findall(pattern, ans_str, re.DOTALL)
	return matches[-1]

	def extract_json(ans_str):
	pattern = r'```json\n(.*?)```'
	matches = re.findall(pattern, ans_str, re.DOTALL)
	return matches[-1]


	def load_qwen3_result(repsonse_path):
	test_func_list = read_jsonl(repsonse_path)
	tests_response = {}
	for response_item in test_func_list:
	try:
	tests = json.loads(extract_json(response_item['code_and_test']))
	except:
	continue
	if response_item['tcb_id'] not in tests_response:
	tests_response[response_item['tcb_id']] = tests
	else:
	tests_response[response_item['tcb_id']] += tests

	return tests_response

	def get_response_function(repsonse_path, model_name):
	# passed = read_jsonl(f"/home/luoxianzhen/yang/save_tests_{model_name}/crux/test_pass_rate.jsonl")
	# has_done = []
	# for item in passed:
	# has_done.append(item['tcb_id'])
	test_func_list = read_jsonl(repsonse_path)

	output_error_count = 0
	total_count = 0

	tests_response = {}
	for response_item in test_func_list:
	try:
	tests = json.loads(extract_json(response_item['response']))
	except:
	continue
	if response_item['tcb_id'] not in tests_response:
	tests_response[response_item['tcb_id']] = tests
	else:
	tests_response[response_item['tcb_id']] += tests

	# it_count = 0
	# for k, v in tests_response.items():
	# total_count += len(v)
	# has = False
	# for test in v:
	# if "output" in test.keys() and isinstance(test['output'], list) and isinstance(test['input'], str):
	# output_error_count += 1
	# has = True
	# if has:
	# it_count += 1
	# print(f"{model_name} total {total_count} error {output_error_count} has {it_count}")

	curr_tests_response = {}
	for k, v in tests_response.items():
	save_flag = True
	# for test in v:
	# if "output" in test.keys() and isinstance(test['output'], list) and isinstance(test['input'], str):
	# save_flag = True
	if save_flag:
	curr_tests_response[k] = v

	return curr_tests_response

	def load_data(test_inputs):
	ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8"))
	res = []

	for item in ds:
	if item['tcb_id'] not in test_inputs:
	continue
	tests = test_inputs[item['tcb_id']]
	if len(tests) <= 0:
	continue
	for c in (item['solutions'][0:3]):
	res.append({
	"code": c['code'],
	"time_limit": item["runtime_limit"],
	"memory_limit": item["memory_limit"],
	"compileAndRunOptions": c["compileAndRunOptions"],
	"test_cases": tests,
	"problem_id": item['tcb_id'],
	})
	return res

	if __name__ == "__main__":
	# data = get_response_function(repsonse_path="/home/luoxianzhen/yang/data/response-orginal/orginal_response_crux_Qwen2.5-14B-Instruct.jsonl", model_name="Qwen2.5-14B-Instruct")
	# res = load_data(data)
	model_name_list=[
	"claude-sonnet-4-20250514-thinking",
	"deepseek-v3",
	"qwen-coder-plus",
	"gpt-4o",
	"qwen-coder-plus",
	"Qwen2.5-7B-Instruct",
	"Qwen2.5-14B-Instruct",
	"Qwen2.5-32B-Instruct",
	"Qwen2.5-Coder-7B-Instruct",
	"Qwen2.5-Coder-14B-Instruct",
	"Qwen2.5-Coder-32B-Instruct",
	"claude-sonnet-4-20250514",
	"qwen3-235b-a22b"
	]
	for model_name in model_name_list:
	get_response_function(repsonse_path=f"/home/luoxianzhen/yang/data/response-orginal/orginal_response_crux_{model_name}.jsonl", model_name=model_name)

Xet Storage Details

Size:: 4.24 kB
Xet hash:: ff102ab9c08e3aa0edbf524078f66b22545bc71d7b3fcf15ba1900717aca1e2d

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.