Buckets:

Tsukihjy
/

testcase

Files

xet

Tsukihjy/testcase / testcase-data /get_rank_result_file.py

Tsukihjy

2 months ago

download

raw

2.95 kB

	import json

	import random

	def get_random_indices(array_length, num_indices):
	# 确保抽取的数量不超过数组长度
	if num_indices > array_length:
	return random.sample(range(array_length), array_length)

	# 使用 random.sample 抽取指定数量的索引
	return random.sample(range(array_length), num_indices)

	def find_first_non_ac(array):
	for element in array:
	if element != "AC":
	return element
	return "AC"

	test_als = ["lcb","ht","algo","crux","predo"]
	test_als = ["lcb"]
	model_name_list = [
	"claude-sonnet-4-20250514-thinking",
	# "deepseek-v3",
	# "qwen3-nothink",
	# "claude4",
	# "gpt-4o",
	# "qwen-coder-plus",
	# "Qwen2.5-7B-Instruct",
	# "Qwen2.5-14B-Instruct",
	# "Qwen2.5-32B-Instruct",
	# "Qwen2.5-Coder-7B-Instruct",
	# "Qwen2.5-Coder-14B-Instruct",
	# "Qwen2.5-Coder-32B-Instruct",
	]

	ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8"))
	rank_dict = {}
	tcb_id_transform = {}
	for item in ds:
	tcb_id_transform[item['wrong_code'][0]['problem']] = item['tcb_id']
	rank_dict[item['tcb_id']] = len(item['wrong_code'])

	all_wrong_code = json.load(open("/home/luoxianzhen/yang/data/Ours/all_wrong_code/data/all_wrong_code_subset.json", "r", encoding="utf-8"))
	sub_set = []

	for item in all_wrong_code:
	sub_set.append(tcb_id_transform[item['name']])


	import os
	for model_name in model_name_list:
	for test_al in test_als:
	result_file = f"/home/luoxianzhen/yang/eval_wrong_code/ALLmode_results/tcb-{model_name}-{test_al}-all-wrong-code-{test_al}-rank5-all.json"
	if not os.path.exists(result_file):
	print(f"{model_name}-{test_al} NOT EXSIT!")
	continue
	results = json.load(open(result_file, "r", encoding="utf-8"))

	for k, v in results.items():
	if k not in sub_set:
	continue
	rank = len(v['codes'])
	rank = rank_dict[k]
	for i in range(0, 1):
	nums_of_tests = rank * (i + 1)
	array_length = max([len(code['status']) for code in v['codes']])
	tests_index = get_random_indices(array_length, nums_of_tests)

	for code in v['codes']:
	if len(tests_index) <= 0:
	continue
	tests_status = [code['status'][i] for i in tests_index] if max(tests_index) < len(code['status']) else code['status']
	code['status'] = tests_status
	tests_details = [code['details'][i] for i in tests_index] if max(tests_index) < len(code['details']) else code['details']
	code['details'] = tests_details
	json.dump(results, open(f"/home/luoxianzhen/yang/eval_wrong_code/ALLmode_results/tcb-{model_name}-{test_al}-all-wrong-code-{test_al}-rank1.json", "w", encoding="utf-8"), indent=4, ensure_ascii=False)

Xet Storage Details

Size:: 2.95 kB
Xet hash:: fdd308e143c83e3a61641b3b8e78be855360e4d2ae6bb58fd45b78fec868d7ac

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.