Buckets:

Tsukihjy
/

testcase

Files

xet

Tsukihjy/testcase / testcase-data /eval /load_data.py

Tsukihjy

about 1 month ago

download

raw

3.41 kB

	import json
	import os

	def remove_freopen_lines(input_str):
	# 将输入字符串按行拆分
	lines = input_str.splitlines()

	# 使用列表推导式移除包含 "fropen" 的行
	filtered_lines = [line for line in lines if "freopen" not in line]

	# 将过滤后的行重新拼接成一个字符串并返回
	return "\n".join(filtered_lines)


	def get_data(name="tcb", prefix_dir=None):

	ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8"))
	res = []

	for item in ds:
	testcases = [item['sample'], ]
	# testcases = os.path.join(prefix_dir, f"tests-{item['tcb_id']}.jsonl")
	# sort_solutions = sorted(item["solutions"], key=lambda x: (x['time'], x['memory']), reverse=True)
	sort_solutions = item['wrong_code']
	for idx, c in enumerate(sort_solutions):
	res.append({
	"code": remove_freopen_lines(c['code']),
	"compileAndRunOptions": c["compileAndRunOptions"],
	"time_limit": item["runtime_limit"],
	"memory_limit": item["memory_limit"],
	"test_cases": testcases,
	"problem_id": item['tcb_id'],
	"code_id": idx,
	"rank": len(item["wrong_code"]),
	})
	return res

	def get_tests(name="tcb", test_path_al1="", algo_path =""):
	ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8"))
	# ds = ds[0:100]
	res = []

	for item in ds:
	testcases_1 = os.path.join(test_path_al1, f"tests-{item['tcb_id']}.jsonl")
	testcases_2 = os.path.join(algo_path, f"tests-{item['tcb_id']}.jsonl")

	res.append({
	"crux_path": testcases_1,
	"algo_path": testcases_2,
	"problem_id": item['tcb_id'],
	})
	return res



	def save_back_results(problem_results, name="tcb", save_dir="results"):
	if name == "codeforces":
	ds = json.load(open("/home/i-luoxianzhen/data/TestCase-Gen/data/codeforces/codeforces-cots_38k_extracted.json", "r", encoding="utf-8"))
	for problem_id, v in problem_results.items():
	ds[problem_id]["res"] = [{"status": code_info["status"], "details": code_info["details"]} for code_info in v["codes"]]
	json.dump(ds, open("/home/i-luoxianzhen/data/TestCase-Gen/data/codeforces/codeforces-cots_38k_extracted_executed.json", "w", encoding="utf-8"), indent=2, ensure_ascii=False)

	if "tcb" in name:
	ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8"))
	ds_dict = {}
	for item in ds:
	ds_dict[item['tcb_id']] = item
	for problem_id, v in problem_results.items():
	ds_dict[problem_id]["res"] = [{"status": code_info["status"], "details": code_info["details"]} for code_info in v["codes"]]
	json.dump(ds, open(f"/home/luoxianzhen/yang/eval/{save_dir}/{name}-extracted_executed.json", "w", encoding="utf-8"), indent=4, ensure_ascii=False)

	if __name__ == "__main__":
	# ds = json.load(open("/home/i-luoxianzhen/data/TestCase-Gen/eval_wrong_code/results/all_results.json"))
	# save_back_results(ds, name="codeforces")
	# print("Data loaded and saved back successfully.")
	get_data("tcb", prefix_dir="/home/i-luoxianzhen/data/TestCase-Gen/saved_tests/crux")

Xet Storage Details

Size:: 3.41 kB
Xet hash:: 066e06bc9513ae45702d9cc6a9c9be1e9e9b315e4eaac690ddb9672348e1fe53

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.