Buckets:

Tsukihjy
/

testcase

about 1 month ago

1.66 kB


	import json
	import re
	def read_jsonl_skip_empty_response(file_path):
	"""
	读取 jsonl 文件，跳过 response == "" 的 item
	返回一个列表，包含有效的 item
	"""
	valid_items = []
	with open(file_path, "r", encoding="utf-8") as f:
	for line in f:
	item = json.loads(line.strip())
	# 跳过 response == "" 的项
	if item.get("response", "") is None or item.get("response", "").strip() == "":
	continue
	valid_items.append(item)

	return valid_items
	def extract_code(ans_str):
	pattern = r'```python\n(.*?)```'
	matches = re.findall(pattern, ans_str, re.DOTALL)
	if len(matches) <= 0:
	return None
	return matches[-1]

	def extract_json(ans_str):
	pattern = r'```json\n(.*?)```'
	matches = re.findall(pattern, ans_str, re.DOTALL)
	if len(matches) <= 0:
	return None
	return matches[-1]

	r1_response_files = "/home/luoxianzhen/yang/data/response-orginal/orginal_response_crux_Qwen2.5-Coder-32B-Instruct.jsonl"

	crux_r1_response = read_jsonl_skip_empty_response(r1_response_files)

	code_only_res = []
	code_and_test = []
	for res in crux_r1_response:
	tcb_id = res["tcb_id"]
	response = res["response"]

	code = extract_code(response)
	testcase = extract_json(response)
	if code is not None and code != "":
	if testcase is None or testcase == "":
	code_only_res.append(tcb_id)
	else:
	code_and_test.append(tcb_id)
	print(f"Code-Only: {len(code_only_res)} \| Code&Test: {len(code_and_test)} \| {round(len(code_only_res) / (len(code_only_res) + len(code_and_test)) * 100, 2)}")

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.