Tsukihjy's picture
download
raw
7.47 kB
import json
import random
def get_random_indices(array_length, num_indices):
# 确保抽取的数量不超过数组长度
if num_indices > array_length:
return random.sample(range(array_length), array_length)
# 使用 random.sample 抽取指定数量的索引
return random.sample(range(array_length), num_indices)
def find_first_non_ac(array):
for element in array:
if element != "AC":
return element
return "AC"
test_als = ["lcb", "crux"]
model_name_list = [
"claude-sonnet-4-20250514-thinking",
# "deepseek-v3",
# "qwen3-nothink",
# "claude4",
# "gpt-4o",
# "qwen-coder-plus",
# "Qwen2.5-7B-Instruct",
# "Qwen2.5-14B-Instruct",
# "Qwen2.5-32B-Instruct",
# "Qwen2.5-Coder-7B-Instruct",
# "Qwen2.5-Coder-14B-Instruct",
# "Qwen2.5-Coder-32B-Instruct",
]
ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8"))
tcb_id_transform = {}
for item in ds:
tcb_id_transform[item['wrong_code'][0]['problem']] = item['tcb_id']
all_wrong_code = json.load(open("/home/luoxianzhen/yang/data/Ours/all_wrong_code/data/all_wrong_code_subset.json", "r", encoding="utf-8"))
sub_set = []
for item in all_wrong_code:
sub_set.append(tcb_id_transform[item['name']])
import os
for model_name in model_name_list:
for test_al in test_als:
for times in range(7, 8):
result_file = f"/home/luoxianzhen/yang/eval_wrong_code/ALLmode_results/tcb-{model_name}-{test_al}-solution-{times}-{test_al}-rank5-all.json"
if not os.path.exists(result_file):
print(f"{model_name}-{test_al} NOT EXSIT!")
continue
results = json.load(open(result_file, "r", encoding="utf-8"))
rank_result = {
"rank1": {"AC":0, "CE": 0, "WA":0, "RE": 0, "TLE":0, "MLE":0,"EXE":0},
}
success_k = {
"rank1": {"total": 0, "hacked": 0},
}
for k, v in results.items():
# if k not in sub_set:
# continue
rank = len(v['codes'])
for i in range(1):
nums_of_tests = rank * (i + 1)
array_length = max([len(code['status']) for code in v['codes']])
## 每道题计算 rate
hacked = 0
status_present = {
"AC":0, "CE": 0, "WA":0, "RE": 0, "TLE":0, "MLE":0,"EXE":0
}
success_k[f"rank{i+1}"]["total"] += rank
if array_length == 0:
status_present['AC'] += rank
else:
for code in v['codes']:
tests_status = code['status']
status_present[find_first_non_ac(tests_status)] += 1
if find_first_non_ac(tests_status) != "AC":
hacked += 1
success_k[f"rank{i+1}"]["hacked"] += hacked / rank
for key, value in status_present.items():
rank_result[f"rank{i+1}"][key] += (value / rank)
# 创建 Markdown 表格
algorithm_model = f"{test_al}|{model_name}"
# 创建 Markdown 表格
markdown_table = "| Algorithm | Model | Rank | AC | CE | WA | RE | TLE | MLE | EXE | Hack Rate |\n"
markdown_table += "|----------|--------|------|----|----|----|----|-----|-----|-----|-----------|\n"
for rank in rank_result:
total = success_k[rank]["total"]
hacked = success_k[rank]["hacked"]
hack_rate = (hacked / len(results) * 100) if total > 0 else 0
hack_rate = round(hack_rate, 2) # 保留两位小数
# 计算每个状态的百分比和数量
status_percentages = []
for key in rank_result[rank]:
count = rank_result[rank][key]
percentage = (count / len(results) * 100)
status_percentages.append(f"{percentage:.2f}%")
# 将每个状态的百分比和数量组合在一起
markdown_table += f"| {algorithm_model} | {rank} | " + " | ".join(status_percentages) + f" | {hack_rate}% |\n"
# 保存到 .md 文件
with open(f"/home/luoxianzhen/yang/data/add_experience/rank_result-{model_name}-{test_al}-solution{times}.md", "w") as file:
file.write(markdown_table)
print("Markdown 文件已生成: rank_result.md")
# subset = ['秘密袭击', '最大公约数', '种树 Growing Trees', '反质数序列', '小 Y 和恐怖的奴隶主', '混合果汁', '迷宫探险', '儒略日', '拼图', 'DFS 序 2', '转圈游戏', '城池攻占', '潜入行动', '简单算术', '基因串', '最小公倍数', '数三角形', '战争调度', '与众不同', '钓鱼', '对称轴 Axes of Symmetry', '唱、跳、rap 和篮球', '降雨量', '餐巾计划', 'LJJ 的字符串', '无源汇有上下界可行流', '最短不公共子串', '任务安排 1', '道路堵塞', '聪明的燕姿', '你的名字', '塔', '遗失的答案', '字母 Letters', '林克卡特树', '石头花园 Rock Garden', '分配问题', 'Seek the Name, Seek the Fame', '炮兵阵地', 'xor', '最大连续和', '树上询问', '转化', '小奇采药', '数列递推', 'Divide', '崂山白花蛇草水', '猜数游戏', '花匠', '小 Q 的草稿', '架设电话线', '格雷码', 'Minimax', '喷水装置', '画框', '人造情感', 'Identity Theft', 'A + B 问题', 'Sim', '回文子串', '修剪草坪', '生日礼物', '填树', '题', '不同的最小割', '勘破神机', '地铁交通', '书法家', '数列互质', 'Sumdiv', '舞会', '动态图连通性', '伪光滑数', '庆典', '货车运输', '数的划分', '老 C 的任务', '吃', 'Transport', '网络协议', '活动安排', "Bessie's Snow Cow", '旅行者', '三元组', '普通平衡树', '取石子游戏 2', '领导集团问题', '滚榜', '飞镖', '抛硬币', '逛公园', '棘手的操作', '大工程', '镜面通道', '炸弹攻击 2', '找爸爸', '线性代数', '小凸玩密室', 'Circus', 'A 的 B 次方']
# def read_pass_rate_file(file_path, key, subset):
# data = {}
# original_gen_nums = 0
# with open(file_path, 'r') as file:
# for line in file:
# item = json.loads(line)
# if item['tcb_id'] not in subset:
# continue
# original_gen_nums += item[key]
# return original_gen_nums
# import os
# original_path = "/home/luoxianzhen/yang/save_tests_claude-sonnet-4-20250514-thinking/{}/test_pass_rate.jsonl"
# test_algs = ['lcb', 'crux']
# save_dict = {}
# for test_alg in test_algs:
# save_dict[test_alg] = []
# original_gen_nums = read_pass_rate_file(original_path.format(test_alg), "gen_nums",subset)
# for times in range(1, 8):
# file_path = "/home/luoxianzhen/yang/save_tests_claude-sonnet-4-20250514-thinking-add/{}-{}-filter/test_pass_rate.jsonl"
# if not os.path.exists(file_path.format(test_alg, str(times))):
# continue
# right_nums = read_pass_rate_file(file_path.format(test_alg, str(times)), "right_nums", subset)
# save_dict[test_alg].append(round(right_nums / original_gen_nums * 100, 2))
# print(save_dict)

Xet Storage Details

Size:
7.47 kB
·
Xet hash:
a563b902128d56967fe36dfac0062f5a6a21f09977d003114a006a22542471d7

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.