| import json | |
| import os | |
| def read_jsonl(file_path): | |
| data = [] | |
| with open(file_path, 'r') as file: | |
| for line in file: | |
| data.append(json.loads(line)) | |
| return data | |
| import re | |
| def extract_code(ans_str): | |
| pattern = r'```python\n(.*?)```' | |
| matches = re.findall(pattern, ans_str, re.DOTALL) | |
| return matches[-1] | |
| def extract_content_code(ans_str): | |
| pattern = r'<ASSISTANT>(.*?)</ASSISTANT>' | |
| matches = re.findall(pattern, ans_str, re.DOTALL) | |
| return matches[-1] | |
| def extract_json(ans_str): | |
| pattern = r'```json\n(.*?)```' | |
| matches = re.findall(pattern, ans_str, re.DOTALL) | |
| return matches[-1] | |
| def load_qwen3_result(repsonse_path): | |
| test_func_list = read_jsonl(repsonse_path) | |
| tests_response = {} | |
| for response_item in test_func_list: | |
| try: | |
| tests = json.loads(extract_json(response_item['code_and_test'])) | |
| except: | |
| continue | |
| if response_item['tcb_id'] not in tests_response: | |
| tests_response[response_item['tcb_id']] = tests | |
| else: | |
| tests_response[response_item['tcb_id']] += tests | |
| return tests_response | |
| def get_response_function(repsonse_path, model_name): | |
| # passed = read_jsonl(f"/home/luoxianzhen/yang/save_tests_{model_name}/crux/test_pass_rate.jsonl") | |
| # has_done = [] | |
| # for item in passed: | |
| # has_done.append(item['tcb_id']) | |
| test_func_list = read_jsonl(repsonse_path) | |
| output_error_count = 0 | |
| total_count = 0 | |
| tests_response = {} | |
| for response_item in test_func_list: | |
| try: | |
| tests = json.loads(extract_json(response_item['response'])) | |
| except: | |
| continue | |
| if response_item['tcb_id'] not in tests_response: | |
| tests_response[response_item['tcb_id']] = tests | |
| else: | |
| tests_response[response_item['tcb_id']] += tests | |
| # it_count = 0 | |
| # for k, v in tests_response.items(): | |
| # total_count += len(v) | |
| # has = False | |
| # for test in v: | |
| # if "output" in test.keys() and isinstance(test['output'], list) and isinstance(test['input'], str): | |
| # output_error_count += 1 | |
| # has = True | |
| # if has: | |
| # it_count += 1 | |
| # print(f"{model_name} total {total_count} error {output_error_count} has {it_count}") | |
| curr_tests_response = {} | |
| for k, v in tests_response.items(): | |
| save_flag = True | |
| # for test in v: | |
| # if "output" in test.keys() and isinstance(test['output'], list) and isinstance(test['input'], str): | |
| # save_flag = True | |
| if save_flag: | |
| curr_tests_response[k] = v | |
| return curr_tests_response | |
| def load_data(test_inputs): | |
| ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8")) | |
| res = [] | |
| for item in ds: | |
| if item['tcb_id'] not in test_inputs: | |
| continue | |
| tests = test_inputs[item['tcb_id']] | |
| if len(tests) <= 0: | |
| continue | |
| for c in (item['solutions'][0:3]): | |
| res.append({ | |
| "code": c['code'], | |
| "time_limit": item["runtime_limit"], | |
| "memory_limit": item["memory_limit"], | |
| "compileAndRunOptions": c["compileAndRunOptions"], | |
| "test_cases": tests, | |
| "problem_id": item['tcb_id'], | |
| }) | |
| return res | |
| if __name__ == "__main__": | |
| # data = get_response_function(repsonse_path="/home/luoxianzhen/yang/data/response-orginal/orginal_response_crux_Qwen2.5-14B-Instruct.jsonl", model_name="Qwen2.5-14B-Instruct") | |
| # res = load_data(data) | |
| model_name_list=[ | |
| "claude-sonnet-4-20250514-thinking", | |
| "deepseek-v3", | |
| "qwen-coder-plus", | |
| "gpt-4o", | |
| "qwen-coder-plus", | |
| "Qwen2.5-7B-Instruct", | |
| "Qwen2.5-14B-Instruct", | |
| "Qwen2.5-32B-Instruct", | |
| "Qwen2.5-Coder-7B-Instruct", | |
| "Qwen2.5-Coder-14B-Instruct", | |
| "Qwen2.5-Coder-32B-Instruct", | |
| "claude-sonnet-4-20250514", | |
| "qwen3-235b-a22b" | |
| ] | |
| for model_name in model_name_list: | |
| get_response_function(repsonse_path=f"/home/luoxianzhen/yang/data/response-orginal/orginal_response_crux_{model_name}.jsonl", model_name=model_name) |
Xet Storage Details
- Size:
- 4.24 kB
- Xet hash:
- ff102ab9c08e3aa0edbf524078f66b22545bc71d7b3fcf15ba1900717aca1e2d
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.