| import json | |
| import re | |
| def read_jsonl_skip_empty_response(file_path): | |
| """ | |
| 读取 jsonl 文件,跳过 response == "" 的 item | |
| 返回一个列表,包含有效的 item | |
| """ | |
| valid_items = [] | |
| with open(file_path, "r", encoding="utf-8") as f: | |
| for line in f: | |
| item = json.loads(line.strip()) | |
| # 跳过 response == "" 的项 | |
| if item.get("response", "") is None or item.get("response", "").strip() == "": | |
| continue | |
| valid_items.append(item) | |
| return valid_items | |
| def extract_code(ans_str): | |
| pattern = r'```python\n(.*?)```' | |
| matches = re.findall(pattern, ans_str, re.DOTALL) | |
| if len(matches) <= 0: | |
| return None | |
| return matches[-1] | |
| def extract_json(ans_str): | |
| pattern = r'```json\n(.*?)```' | |
| matches = re.findall(pattern, ans_str, re.DOTALL) | |
| if len(matches) <= 0: | |
| return None | |
| return matches[-1] | |
| r1_response_files = "/home/luoxianzhen/yang/data/response-orginal/orginal_response_crux_Qwen2.5-Coder-32B-Instruct.jsonl" | |
| crux_r1_response = read_jsonl_skip_empty_response(r1_response_files) | |
| code_only_res = [] | |
| code_and_test = [] | |
| for res in crux_r1_response: | |
| tcb_id = res["tcb_id"] | |
| response = res["response"] | |
| code = extract_code(response) | |
| testcase = extract_json(response) | |
| if code is not None and code != "": | |
| if testcase is None or testcase == "": | |
| code_only_res.append(tcb_id) | |
| else: | |
| code_and_test.append(tcb_id) | |
| print(f"Code-Only: {len(code_only_res)} | Code&Test: {len(code_and_test)} | {round(len(code_only_res) / (len(code_only_res) + len(code_and_test)) * 100, 2)}") | |
Xet Storage Details
- Size:
- 1.66 kB
- Xet hash:
- 0408e99708dae7f04b8cf1f2ff0174f489de9e4f3f7b19f5c1494798fc40f2f7
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.