Tsukihjy's picture
download
raw
1.66 kB
import json
import re
def read_jsonl_skip_empty_response(file_path):
"""
读取 jsonl 文件,跳过 response == "" 的 item
返回一个列表,包含有效的 item
"""
valid_items = []
with open(file_path, "r", encoding="utf-8") as f:
for line in f:
item = json.loads(line.strip())
# 跳过 response == "" 的项
if item.get("response", "") is None or item.get("response", "").strip() == "":
continue
valid_items.append(item)
return valid_items
def extract_code(ans_str):
pattern = r'```python\n(.*?)```'
matches = re.findall(pattern, ans_str, re.DOTALL)
if len(matches) <= 0:
return None
return matches[-1]
def extract_json(ans_str):
pattern = r'```json\n(.*?)```'
matches = re.findall(pattern, ans_str, re.DOTALL)
if len(matches) <= 0:
return None
return matches[-1]
r1_response_files = "/home/luoxianzhen/yang/data/response-orginal/orginal_response_crux_deepseek-r1-0528-volce.jsonl"
crux_r1_response = read_jsonl_skip_empty_response(r1_response_files)
code_only_res = []
code_and_test = []
for res in crux_r1_response:
tcb_id = res["tcb_id"]
response = res["response"]
code = extract_code(response)
testcase = extract_json(response)
if code is not None and code != "":
if testcase is None or testcase == "":
code_only_res.append(tcb_id)
else:
code_and_test.append(tcb_id)
print(f"Code-Only: {len(code_only_res)} | Code&Test: {len(code_and_test)} | {round(len(code_only_res) / (len(code_only_res) + len(code_and_test)) * 100, 2)}")

Xet Storage Details

Size:
1.66 kB
·
Xet hash:
51ed9a218b25905e59d10f6de672eb2d35b900c8b9b1301871eed0880fcbff1e

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.