Tsukihjy/testcase / testcase-data /Ours /update_correct_code.py
Tsukihjy's picture
download
raw
4.82 kB
import json
import re
from collections import defaultdict
def clean_key(original_key):
tcb_id = original_key.replace('/', ' ')
tcb_id = re.sub(r'^[^.]*\.', '', tcb_id)
i = tcb_id.find('」')
if i != -1:
tcb_id = tcb_id[i+1:]
tcb_id = tcb_id.strip()
return tcb_id
import datetime
def write_log(message: str, log_file: str = "log-predo.txt"):
"""
Append a timestamped log message to a log file.
Args:
message (str): The message to log.
log_file (str): The path to the log file (default is 'log.txt').
Returns:
None
"""
timestamp = datetime.datetime.now().strftime("[%Y-%m-%d %H:%M:%S]")
with open(log_file, "a", encoding="utf-8") as f:
f.write(f"{timestamp} {message}\n")
to_update = {
"取石子游戏": [2],
"火星探险": [2],
"数列区间最大值": [0, 1],
"简单算术": [0],
"方格取数": [0],
"路径交点": [1],
"JOJO": [0],
"Fibonacci 第 n 项": [0],
"最小瓶颈路": [1],
"智力测验 Intelligence Test": [0],
"Fibonacci 前 n 项和": [0],
"炸弹攻击": [0],
"Cowntact Tracing": [2],
"矩阵游戏": [2],
"密码锁": [0],
"最小瓶颈路(加强版)": [2],
"最小生成树": [1],
"数列分块入门 4": [2],
"斐波那契": [2],
"反色游戏": [2],
"矩阵乘法": [2],
"混合果汁": [2],
"骑士精神": [0],
"旅行": [1],
"小 Z 的房间": [2],
"积木 Blocks": [1],
"驾驶员 Pilots": [0],
"Circus": [0],
"世界树": [1],
"Push a Box": [2],
"取石子游戏 2": [2],
"臭虫集成电路公司": [0],
"PATH": [1],
"奇怪的计算器": [1],
"工会 Guilds": [0],
"如果奇迹有颜色": [0],
"棋盘": [0],
"有意义的字符串": [1],
"EntropyIncreaser 与金字塔": [0],
"小 Y 和二叉树": [0],
"简单的函数": [0],
"Spaceship": [2],
"一棵树": [2],
"微信步数": [2],
"影魔": [0],
"火星人": [2],
"最长树链": [0],
"Odd": [0],
"题": [0],
"Sleeping in Class": [1],
"办公楼 Offices": [0],
"分手是祝愿": [1],
"拉格朗日插值": [1],
"树链剖分": [0],
"守卫": [2],
"吃": [0],
"这是一道简单的数学题": [0],
"高精度进制转换": [0, 1],
"树上操作": [1]
}
log_file = "/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/log-update_correct.txt"
tcb_bench = json.load(open("/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/TestcaseBench-v12-update.json", "r", encoding="utf-8"))
# tcb_bench = json.load(open("/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/ADD_data_final.json", "r", encoding="utf-8"))
correct_code_base = json.load(open("/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/correct_code-v2.json", "r", encoding="utf-8"))
list_no_double = []
correct_code_dict = {}
double_id = []
## 添加 无重复的题面
# for k, v in correct_code.items():
# tcb_id = clean_key(k)
# if tcb_id in double_id:
# continue
# if tcb_id in list_no_double:
# correct_code_dict.pop(tcb_id)
# double_id.append(tcb_id)
# write_log(f"{tcb_id} double_id", log_file)
# continue
# correct_code_dict[tcb_id] = v
# list_no_double.append(tcb_id)
print(double_id)
## 现在更新Correct Code
for item in tcb_bench:
tcb_id = item['wrong_code'][0]['problem']
if tcb_id not in correct_code_base.keys():
write_log(f"{tcb_id} no_update", log_file)
continue
correct_codes = correct_code_base[tcb_id]
if len(correct_codes) >= 3:
item['solutions'] = correct_codes[:3]
else:
for i, code in enumerate(correct_codes):
item['solutions'][i] = code
json.dump(tcb_bench, open(f"/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/TestcaseBench-v13.json", "w", encoding="utf-8"), indent=4, ensure_ascii=False)
# json.dump(tcb_bench, open(f"/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/TestcaseBench-batch8.json", "w", encoding="utf-8"), indent=4, ensure_ascii=False)
# tcb_bench_v11 = json.load(open("/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/TestcaseBench-v11-temp.json", "r", encoding="utf-8"))
# tcb_bench_v12 = json.load(open("/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/TestcaseBench-v12.json", "r", encoding="gbk"))
# ans_dict = {}
# for item in tcb_bench_v11:
# ans_dict[item['tcb_id']] = item
# for item in tcb_bench_v12:
# tcb_id = item['tcb_id']
# if tcb_id not in ans_dict.keys():
# print(tcb_id)
# continue
# item['solutions'] = ans_dict[item['tcb_id']]['solutions']
# json.dump(tcb_bench_v12, open(f"/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/TestcaseBench-v12-update.json", "w", encoding="utf-8"), indent=4, ensure_ascii=False)

Xet Storage Details

Size:
4.82 kB
·
Xet hash:
7b42b0a1a221595fcbb8869a85003df09ce21ac74c062c6bb0f94429bdf81e9a

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.