| import json | |
| import os | |
| def read_jsonl(file_path): | |
| data = [] | |
| with open(file_path, 'r') as file: | |
| for line in file: | |
| data.append(json.loads(line)) | |
| return data | |
| import re | |
| def extract_code(ans_str): | |
| pattern = r'```json\n(.*?)```' | |
| matches = re.findall(pattern, ans_str, re.DOTALL) | |
| return matches[-1] | |
| def extract_content_code(ans_str): | |
| pattern = r'<ASSISTANT>(.*?)</ASSISTANT>' | |
| matches = re.findall(pattern, ans_str, re.DOTALL) | |
| return matches[-1] | |
| # def load_qwen3_result(repsonse_path): | |
| # test_func_list = read_jsonl(repsonse_path) | |
| # for item in test_func_list: | |
| # valid_func = json.loads(extract_code(item['response_validator']))['input_validator'] | |
| # response_dict = json.loads(extract_code(item["response_generator"])) | |
| # prompt_tests = response_dict['directly_generated_inputs'] | |
| # random_func = response_dict['regular_input_generator'] | |
| # edge_func = response_dict['hacking_input_generator'] | |
| # item['func_list'] = { | |
| # "prompt": [], | |
| # "edge": [], | |
| # "random": [], | |
| # "validate_func": [] | |
| # } | |
| # return test_func_list | |
| def get_response_function(repsonse_path, model_name, test_al): | |
| model_name_trans = { | |
| "qwen3-nothink": "qwen3-235b-a22b", | |
| "claude4":"claude-sonnet-4-20250514" | |
| } | |
| ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8")) | |
| rank_dict = {} | |
| for item in ds: | |
| rank_dict[item['tcb_id']] = len(item['wrong_code']) | |
| gen_nums = json.load(open(f"/home/luoxianzhen/yang/tests_count/update/{model_name}-{test_al}-tests-count.json", "r", encoding="utf-8")) | |
| if model_name in model_name_trans.keys(): | |
| model_name = model_name_trans[model_name] | |
| test_func_list = read_jsonl(repsonse_path.format(test_al, model_name)) | |
| test_functions = {} | |
| for response_item in test_func_list: | |
| try: | |
| valid_func = json.loads(extract_code(response_item['response_validator']))['input_validator'] | |
| response_dict = json.loads(extract_code(response_item["response_generator"])) | |
| prompt_tests = response_dict['directly_generated_inputs'] | |
| random_func = response_dict['regular_input_generator'] | |
| edge_func = response_dict['hacking_input_generator'] | |
| except: | |
| continue | |
| if response_item['tcb_id'] not in test_functions.keys(): | |
| test_functions[response_item['tcb_id']] = { | |
| "prompt": [], | |
| "edge": [], | |
| "random": [], | |
| "validate_func": [] | |
| } | |
| test_functions[response_item['tcb_id']]['prompt'] += prompt_tests | |
| test_functions[response_item['tcb_id']]['random'].append(random_func) | |
| test_functions[response_item['tcb_id']]['edge'].append(edge_func) | |
| test_functions[response_item['tcb_id']]['validate_func'].append(valid_func) | |
| func_data = [] | |
| for k, v in test_functions.items(): | |
| if k not in rank_dict.keys(): | |
| continue | |
| if 'gen_nums' in gen_nums[k].keys() and gen_nums[k]['gen_nums'] > 0: | |
| continue | |
| func_data.append({ | |
| "tcb_id": k, | |
| "func_list": v, | |
| 'limit_nums': rank_dict[k] * 5 - gen_nums[k]['gen_nums'] if 'gen_nums' in gen_nums[k].keys() else rank_dict[k] * 5 | |
| }) | |
| return func_data | |
| def load_data(test_inputs): | |
| ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8")) | |
| res = [] | |
| for item in ds: | |
| tests = [test_list for test_list in test_inputs if test_list["tcb_id"] == item['tcb_id']] | |
| if len(tests) <= 0: | |
| continue | |
| tests = tests[0]['generate_testcases'] | |
| for c in (item['solutions'][0:3]): | |
| res.append({ | |
| "code": c['code'], | |
| "time_limit": item["runtime_limit"], | |
| "memory_limit": item["memory_limit"], | |
| "compileAndRunOptions": c["compileAndRunOptions"], | |
| "test_cases": tests, | |
| "problem_id": item['tcb_id'], | |
| }) | |
| return res | |
| def check_none_ht(repsonse_path, model_name, test_al): | |
| model_name_trans = { | |
| "qwen3-nothink": "qwen3-235b-a22b", | |
| "claude4":"claude-sonnet-4-20250514" | |
| } | |
| ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8")) | |
| rank_dict = {} | |
| for item in ds: | |
| rank_dict[item['tcb_id']] = len(item['wrong_code']) | |
| gen_nums = json.load(open(f"/home/luoxianzhen/yang/tests_count/update/{model_name}-{test_al}-tests-count.json", "r", encoding="utf-8")) | |
| if model_name in model_name_trans.keys(): | |
| model_name = model_name_trans[model_name] | |
| test_func_list = read_jsonl(repsonse_path.format(test_al, model_name)) | |
| test_functions = {} | |
| fail_generate_json = 0 | |
| for response_item in test_func_list: | |
| try: | |
| valid_func = json.loads(extract_code(response_item['response_validator']))['input_validator'] | |
| response_dict = json.loads(extract_code(response_item["response_generator"])) | |
| prompt_tests = response_dict['directly_generated_inputs'] | |
| random_func = response_dict['regular_input_generator'] | |
| edge_func = response_dict['hacking_input_generator'] | |
| # if edge_func is None: | |
| # None_response_hack += 1 | |
| except: | |
| fail_generate_json += 1 | |
| continue | |
| if response_item['tcb_id'] not in test_functions.keys(): | |
| test_functions[response_item['tcb_id']] = { | |
| "prompt": [], | |
| "edge": [], | |
| "random": [], | |
| "validate_func": [] | |
| } | |
| test_functions[response_item['tcb_id']]['prompt'] += prompt_tests | |
| test_functions[response_item['tcb_id']]['random'].append(random_func) | |
| test_functions[response_item['tcb_id']]['edge'].append(edge_func) | |
| test_functions[response_item['tcb_id']]['validate_func'].append(valid_func) | |
| fail_generate = 0 | |
| None_response_hack = 0 | |
| func_data = [] | |
| for k, v in test_functions.items(): | |
| func_data.append({ | |
| "tcb_id": k, | |
| "func_list": v, | |
| # 'limit_nums': rank_dict[k] * 5 - gen_nums[k]['gen_nums'] if 'gen_nums' in gen_nums[k].keys() else rank_dict[k] * 5 | |
| }) | |
| curr = 0 | |
| for code in v["edge"]: | |
| if code is None: | |
| None_response_hack += 1 | |
| curr += 1 | |
| if len(v["edge"]) == curr: | |
| fail_generate += 1 | |
| print(f"Json 加载失败:{fail_generate_json} 0 个Edge_generator {fail_generate}, Edge_generator is None: {None_response_hack}") | |
| return func_data | |
| if __name__ == "__main__": | |
| check_none_ht("/home/luoxianzhen/yang/data/response-orginal/orginal_response_{}_{}.jsonl", "Qwen2.5-Coder-32B-Instruct", 'ht') |
Xet Storage Details
- Size:
- 7.03 kB
- Xet hash:
- 72d81af9d51489ec696e331bf6570b45733a87706ab4ee9e52f3142a4aaaff94
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.