Tsukihjy's picture
download
raw
3.41 kB
import json
import os
def remove_freopen_lines(input_str):
# 将输入字符串按行拆分
lines = input_str.splitlines()
# 使用列表推导式移除包含 "fropen" 的行
filtered_lines = [line for line in lines if "freopen" not in line]
# 将过滤后的行重新拼接成一个字符串并返回
return "\n".join(filtered_lines)
def get_data(name="tcb", prefix_dir=None):
ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8"))
res = []
for item in ds:
testcases = [item['sample'], ]
# testcases = os.path.join(prefix_dir, f"tests-{item['tcb_id']}.jsonl")
# sort_solutions = sorted(item["solutions"], key=lambda x: (x['time'], x['memory']), reverse=True)
sort_solutions = item['wrong_code']
for idx, c in enumerate(sort_solutions):
res.append({
"code": remove_freopen_lines(c['code']),
"compileAndRunOptions": c["compileAndRunOptions"],
"time_limit": item["runtime_limit"],
"memory_limit": item["memory_limit"],
"test_cases": testcases,
"problem_id": item['tcb_id'],
"code_id": idx,
"rank": len(item["wrong_code"]),
})
return res
def get_tests(name="tcb", test_path_al1="", algo_path =""):
ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8"))
# ds = ds[0:100]
res = []
for item in ds:
testcases_1 = os.path.join(test_path_al1, f"tests-{item['tcb_id']}.jsonl")
testcases_2 = os.path.join(algo_path, f"tests-{item['tcb_id']}.jsonl")
res.append({
"crux_path": testcases_1,
"algo_path": testcases_2,
"problem_id": item['tcb_id'],
})
return res
def save_back_results(problem_results, name="tcb", save_dir="results"):
if name == "codeforces":
ds = json.load(open("/home/i-luoxianzhen/data/TestCase-Gen/data/codeforces/codeforces-cots_38k_extracted.json", "r", encoding="utf-8"))
for problem_id, v in problem_results.items():
ds[problem_id]["res"] = [{"status": code_info["status"], "details": code_info["details"]} for code_info in v["codes"]]
json.dump(ds, open("/home/i-luoxianzhen/data/TestCase-Gen/data/codeforces/codeforces-cots_38k_extracted_executed.json", "w", encoding="utf-8"), indent=2, ensure_ascii=False)
if "tcb" in name:
ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8"))
ds_dict = {}
for item in ds:
ds_dict[item['tcb_id']] = item
for problem_id, v in problem_results.items():
ds_dict[problem_id]["res"] = [{"status": code_info["status"], "details": code_info["details"]} for code_info in v["codes"]]
json.dump(ds, open(f"/home/luoxianzhen/yang/eval/{save_dir}/{name}-extracted_executed.json", "w", encoding="utf-8"), indent=4, ensure_ascii=False)
if __name__ == "__main__":
# ds = json.load(open("/home/i-luoxianzhen/data/TestCase-Gen/eval_wrong_code/results/all_results.json"))
# save_back_results(ds, name="codeforces")
# print("Data loaded and saved back successfully.")
get_data("tcb", prefix_dir="/home/i-luoxianzhen/data/TestCase-Gen/saved_tests/crux")

Xet Storage Details

Size:
3.41 kB
·
Xet hash:
066e06bc9513ae45702d9cc6a9c9be1e9e9b315e4eaac690ddb9672348e1fe53

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.