Tsukihjy's picture
download
raw
1.66 kB
import json
import re
def read_jsonl_skip_empty_response(file_path):
"""
读取 jsonl 文件,跳过 response == "" 的 item
返回一个列表,包含有效的 item
"""
valid_items = []
with open(file_path, "r", encoding="utf-8") as f:
for line in f:
item = json.loads(line.strip())
# 跳过 response == "" 的项
if item.get("response", "") is None or item.get("response", "").strip() == "":
continue
valid_items.append(item)
return valid_items
def extract_code(ans_str):
pattern = r'```python\n(.*?)```'
matches = re.findall(pattern, ans_str, re.DOTALL)
if len(matches) <= 0:
return None
return matches[-1]
def extract_json(ans_str):
pattern = r'```json\n(.*?)```'
matches = re.findall(pattern, ans_str, re.DOTALL)
if len(matches) <= 0:
return None
return matches[-1]
r1_response_files = "/home/luoxianzhen/yang/data/response-orginal/orginal_response_crux_Qwen2.5-Coder-32B-Instruct.jsonl"
crux_r1_response = read_jsonl_skip_empty_response(r1_response_files)
code_only_res = []
code_and_test = []
for res in crux_r1_response:
tcb_id = res["tcb_id"]
response = res["response"]
code = extract_code(response)
testcase = extract_json(response)
if code is not None and code != "":
if testcase is None or testcase == "":
code_only_res.append(tcb_id)
else:
code_and_test.append(tcb_id)
print(f"Code-Only: {len(code_only_res)} | Code&Test: {len(code_and_test)} | {round(len(code_only_res) / (len(code_only_res) + len(code_and_test)) * 100, 2)}")

Xet Storage Details

Size:
1.66 kB
·
Xet hash:
0408e99708dae7f04b8cf1f2ff0174f489de9e4f3f7b19f5c1494798fc40f2f7

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.