Tsukihjy/testcase / methods /Hardtest /check_failed.py
Tsukihjy's picture
download
raw
1.15 kB
from load_response import get_response_function
from execute_tool import function_execute_box_process
data = get_response_function(repsonse_path="/home/luoxianzhen/yang/data/response-orginal/orginal_response_{}_{}.jsonl", model_name="Qwen2.5-32B-Instruct", test_al="ht")
total_gen_func = 0
failed_func = 0
fail_ti = 0
no_gen = 0
for data_item in data:
random_func_list = data_item['func_list'].get('random', [])
edge_func_list = data_item['func_list'].get('edge', [])
total_gen_func += len(random_func_list)
total_gen_func += len(edge_func_list)
is_wrong = False
temp_count = 0
for random_func in random_func_list:
res = function_execute_box_process(random_func, funcname="gen_regular_input", time_limit=3)
# print("passed")
if res.get('status') == 'error':
failed_func += 1
temp_count += 1
is_wrong = True
print(res.get('details'))
if is_wrong:
fail_ti+=1
if temp_count == len(random_func_list):
no_gen += 1
print(f"total {total_gen_func} | failed {failed_func} | contain {fail_ti} | no gen {no_gen}")

Xet Storage Details

Size:
1.15 kB
·
Xet hash:
f49f549f946c85c97a2d321e47bcd485ecf4663c5cec9cc003ced167a2c1439d

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.