Tsukihjy/testcase / testcase-data /Ours /get_worry_code.py
Tsukihjy's picture
download
raw
4.61 kB
import json
import re
from collections import defaultdict
def clean_key(original_key):
tcb_id = original_key.replace('/', ' ')
tcb_id = re.sub(r'^[^.]*\.', '', tcb_id)
i = tcb_id.find('」')
if i != -1:
tcb_id = tcb_id[i+1:]
tcb_id = tcb_id.strip()
return tcb_id
# def process_and_dedup_json(filepath):
# with open(filepath, 'r', encoding='utf-8') as f:
# data = json.load(f)
# deduped_data = {}
# key_count = defaultdict(int)
# repeated_keys = set()
# for key, value in data.items():
# cleaned_key = clean_key(key)
# key_count[cleaned_key] += 1
# if cleaned_key not in deduped_data:
# deduped_data[cleaned_key] = value
# else:
# repeated_keys.add(cleaned_key)
# return deduped_data, sorted(repeated_keys)
# # 示例使用
# input_path = '/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/final_v1.0.json'
# output_path = '/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/final_v2.0.json'
# deduped_json, repeated_ids = process_and_dedup_json(input_path)
# # 写入去重后的 JSON 文件
# with open(output_path, 'w', encoding='utf-8') as f:
# json.dump(deduped_json, f, ensure_ascii=False, indent=2)
# add_keys = ['wrong_code', 'timeLimit', 'memoryLimit', 'rank', 'n_rows', 'balance_var', 'balance_column_sum']
# testcase_bench = []
# with open('/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/TestcaseBench.json', 'r', encoding='utf-8') as f:
# testcase_bench = json.load(f)
# for item in testcase_bench:
# tcb_id = item['tcb_id']
# if tcb_id in deduped_json:
# for key in add_keys:
# item[key] = deduped_json[tcb_id][key]
# else:
# print(tcb_id)
# with open('/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/TestcaseBench-v2.json', 'w', encoding='utf-8') as f:
# json.dump(testcase_bench, f, ensure_ascii=False, indent=4)
# import json
# def remove_aa_bb_keys(input_file, output_file):
# with open(input_file, 'r', encoding='utf-8') as f:
# data = json.load(f)
# # 如果 data 是列表,处理每个元素
# # "timeLimit": 1500,
# # "memoryLimit": 512,
# if isinstance(data, list):
# for item in data:
# item.pop('timeLimit', None)
# item.pop('memoryLimit', None)
# else:
# raise ValueError("JSON 须为数组形式,每个元素是一个对象")
# # 保存结果
# print(f"共处理 {len(data)} 条记录。")
# return data
# 示例用法
# data = remove_aa_bb_keys('/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/TestcaseBench-v2.json', '/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/TestcaseBench-v3.json')
# for item in data:
# for w_code_item in item['wrong_code']:
# w_code_item['problem'] = item['tcb_id']
# with open('/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/TestcaseBench-v3.json', 'w', encoding='utf-8') as f:
# json.dump(data, f, ensure_ascii=False, indent=4)
# with open("/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/balance_v3.0.dev_with_code.json", 'r', encoding='utf-8') as f:
# worry_code = json.load(f)
# with open("/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/TestcaseBench-v4.json", 'r', encoding='utf-8') as f:
# tcb_bench = json.load(f)
# worry_code_dict = {}
# for k, v in worry_code.items():
# tcb_id = clean_key(k)
# worry_code_dict[tcb_id] = v
# for item in tcb_bench:
# if "地底蔷薇" in item['tcb_id']:
# worry_codes = worry_code_dict[""]
# else:
# worry_codes = worry_code_dict[item['tcb_id']]
# item['wrong_code'] = worry_codes
# with open('/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/TestcaseBench-v5.json', 'w', encoding='utf-8') as f:
# json.dump(tcb_bench, f, ensure_ascii=False, indent=6)
with open("/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/TestcaseBench-v5.json", 'r', encoding='utf-8') as f:
tcb_bench = json.load(f)
def remove_freopen_lines(code_str: str) -> str:
"""
移除包含 'freopen' 的代码行
:param code_str: 原始代码字符串
:return: 处理后的代码字符串
"""
lines = code_str.splitlines()
filtered_lines = [line for line in lines if 'freopen' not in line]
return '\n'.join(filtered_lines)
for item in tcb_bench:
for code_item in item["wrong_code"]["wrong_code"]:
code_item['code'] = remove_freopen_lines(code_item['code'])
with open('/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/TestcaseBench-v6.json', 'w', encoding='utf-8') as f:
json.dump(tcb_bench, f, ensure_ascii=False, indent=6)

Xet Storage Details

Size:
4.61 kB
·
Xet hash:
325948b8fb047e2614d5ec6e79663890563ad1c8167f1ad79d3481e7b38d6237

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.