Tsukihjy/testcase / testcase-data /Ours /get_rest_correct_code.py
Tsukihjy's picture
download
raw
13 kB
import sys
sys.path.append("/home/i-luoxianzhen/data/TestCase-Gen/methods/utils")
from dataset_all import get_datasets_by_name
import os
import uuid
import subprocess
import tempfile
import json
import re
import subprocess
import tempfile
import os
import resource
import uuid
import json
import random
from decimal import Decimal
import re
def is_decimal(s):
try:
a = float(s)
except:
return False
return bool(re.match(r"^-?\d+\.\d+$", s))
def remove_freopen_lines_from_string(code_string):
lines = code_string.splitlines() # 将输入字符串按行分割
# 过滤掉包含 'freopen' 的行
filtered_lines = [line for line in lines if 'freopen' not in line]
# 将过滤后的行合并为一个新的字符串
return '\n'.join(filtered_lines)
def run_cpp_code_linux(code, input_string, output_string, std, memory_limit, time_limit):
res = {}
with tempfile.TemporaryDirectory() as tmpdirname:
unique_id = uuid.uuid4()
cpp_file = os.path.join(tmpdirname, f"{unique_id}.cpp")
exe_file = os.path.join(tmpdirname, f"{unique_id}.out")
# Write C++ code to file
with open(cpp_file, "w") as f:
f.write(code)
# Compile the C++ code
compile_result = subprocess.run(
["g++", cpp_file, "-o", exe_file, f"-std={std}"],
capture_output=True,
text=True
)
if compile_result.returncode != 0:
res["status"] = "CE"
res["details"] = compile_result.stderr
return res
memory_kb = int(memory_limit) * 1024
time_limit_int = int(time_limit) // 1000 + 1
# cmd = f"ulimit -t {time_limit_int} && ulimit -v {memory_kb} && {exe_file}"
cmd = [exe_file]
# print(f"cmd {cmd}")
cmd = exe_file
error = ""
try:
result = subprocess.run(
cmd,
input=input_string,
text=True,
capture_output=True,
shell=True,
timeout=time_limit_int
)
# 检查返回码
if result.returncode != 0:
if result.returncode == 137: # SIGKILL - 通常是内存超限
error = "MLE"
elif result.returncode == 124: # timeout命令的超时返回码
error = "TLE"
else:
error = "RE"
if not error and result.stderr:
error = "RE"
print(result)
if isinstance(output_string, float):
output_string = str(output_string)
expected_lines = [line.strip() for line in output_string.splitlines()]
actual_lines = [line.strip() for line in result.stdout.splitlines()]
# 移除空行
expected_lines = [line for line in expected_lines if line]
actual_lines = [line for line in actual_lines if line]
# ## 合并为一行
actual_lines = (" ".join(actual_lines)).strip()
expected_lines = (" ".join(expected_lines)).strip()
# ## 小数保留 6 位
if is_decimal(actual_lines) and is_decimal(expected_lines):
if abs(float(actual_lines) - float(expected_lines)) > 1e-6:
error = "WA"
res['status'] = "WA"
# print(f"gap: {abs(float(actual_lines) - float(expected_lines))}")
return res
else:
res['stdout'] = actual_lines
res['status'] = "AC"
return res
if actual_lines != expected_lines:
error = "WA"
res['status'] = "WA"
print(f"actual_lines : {actual_lines}")
print(f"expected_lines : {expected_lines}")
return res
if actual_lines == expected_lines:
res['stdout'] = actual_lines
res['status'] = "AC"
return res
except subprocess.TimeoutExpired:
error = "TLE"
except Exception as e:
error = "RE"
if error:
res['status'] = error
res['stdout'] = ""
return res
import json
# code_base = json.load(open("/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/balance_v1.0.dev_with_code.json", "r", encoding="utf-8"))
# add_data = json.load(open("/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/correct.json", "r", encoding="utf-8"))
# tcb_benc = json.load(open("/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/TestcaseBench-v13.json", "r", encoding="utf-8"))
compileAndRunOptions = {
"compiler": "g++",
"std": "c++11",
"O": "2",
"m": "x32"
}
# save_dict = {}
# i = 0
# for k, v in code_base.items():
# if k in correct_code_base.keys() and len(correct_code_base[k]) >= 3:
# continue
# print(f"idx: {k} {i} Start")
# i+=1
# if len(v['sample']) >= 1:
# input_string = v['sample'][0]['inputData']
# output_string = v['sample'][0]['outputData']
# else:
# continue
# pass_code_list = []
# for item in add_data[k]:
# if item['lang'] != 'cpp':
# continue
# result = run_cpp_code_linux(re(item['code']), input_string, item['compileAndRunOptions']['std'], v['memoryLimit'], v['timeLimit'])
# if result['status'] == "AC":
# correct_code_base[k].append(item['code'])
# if len(correct_code_base[k]) >= 3:
# break
# json.dump(correct_code_base, open(f"/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/correct_code-v3.json", "w", encoding="utf-8"), indent=4, ensure_ascii=False)
# no_pass_update_list = [
# '262144 Revisited',
# 'Balance Beam',
# 'Counting Graphs',
# 'JOJO',
# 'Problem Setting',
# 'Sleeping in Class',
# 'Spaceship',
# '一棵树',
# '世界树',
# '函数调用',
# '反质数序列',
# '奥术神杖',
# '寻宝游戏',
# '寿司晚宴',
# '小 H 爱染色',
# '小 Y 的背包计数问题',
# '工会 Guilds',
# '平凡的骰子',
# '平方数',
# '序列',
# '微信步数',
# '排序大师',
# '数字计数',
# '数组游戏',
# '斐波那契',
# '时间复杂度',
# '最小瓶颈路',
# '染色',
# '树上操作',
# '棘手的操作',
# '求和',
# '特殊勾股数',
# '白兔之舞',
# '矩阵',
# '矩阵游戏',
# '简单题',
# '类欧几里得算法',
# '花神的嘲讽计划',
# '路径交点',
# '随机游走',
# '高精度进制转换'
# ]
# no_update_list = []
# for item in tcb_benc:
# if item['tcb_id'] not in no_pass_update_list:
# continue
# k = item['wrong_code'][0]['problem']
# if k not in code_base.keys():
# print(f"{k} not update")
# no_update_list.append(k)
# continue
# v= code_base[k]
# if len(code_base[k]['sample']) >= 1:
# input_string = code_base[k]['sample'][0]['inputData']
# output_string = code_base[k]['sample'][0]['outputData']
# else:
# continue
# pos = 0
# for correct_code in add_data[k]:
# if correct_code['lang'] != 'cpp':
# continue
# result = run_cpp_code_linux(correct_code['code'], input_string, correct_code['compileAndRunOptions']['std'], v['memoryLimit'], v['timeLimit'])
# if result['status'] == "AC":
# item['solutions'][pos] = correct_code['code']
# print(f"{k} update {pos}")
# pos += 1
# if pos >= 3:
# break
# json.dump(tcb_benc, open(f"/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/TestcaseBench-v14.json", "w", encoding="utf-8"), indent=4, ensure_ascii=False)
# print(no_update_list)
# get_list = ['#42575. 「USACO 2023.2 Platinum」Problem Setting', '#2206. 「HNOI2014」世界树', '#2070. 「SDOI2016」平凡的骰子', '#40844. 「USACO 2020.12 Platinum」Spaceship', '#2125. 「HAOI2015」树上操作']
# cc_small = {}
# for k, v in add_data.items():
# if k in get_list:
# cc_small[k] = v
# json.dump(cc_small, open(f"/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/correct_code_for_human_check", "w", encoding="utf-8"), indent=4, ensure_ascii=False)
# code_base = {}
# with open('/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/balance_v1.0.dev_with_code.json', 'r', encoding='utf-8') as f1:
# data1 = json.load(f1)
# # 读取第二个JSON文件
# with open('/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/balance_v2.0_with_code.json', 'r', encoding='utf-8') as f2:
# data2 = json.load(f2)
# # 读取第三个JSON文件
# with open('/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/balance_v3.0.dev_with_code.json', 'r', encoding='utf-8') as f3:
# data3 = json.load(f3)
# # 合并数据,保留第一个文件中的键值对
# code_base = data1.copy() # 从第一个文件开始
# for data in [data2, data3]: # 对剩余的文件进行处理
# for key, value in data.items():
# if key not in code_base: # 如果该键不存在于merged_data中,则添加
# code_base[key] = value
# right_code_add = json.load(open("/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/sample_add.json", "r", encoding="utf-8"))
# add_data = json.load(open("/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/correct.json", "r", encoding="utf-8"))
# tcb_ben = json.load(open("/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/TestcaseBench-v18.json", "r", encoding="utf-8"))
# no_update_item = {}
# for item in tcb_ben:
# if len(item['solutions']) >= 3:
# continue
# k = item['wrong_code'][0]['problem']
# v = code_base[k]
# print(k)
# if 'sample' in code_base[k].keys() and len(code_base[k]['sample']) >= 1:
# input_string = code_base[k]['sample'][0]['inputData']
# output_string = code_base[k]['sample'][0]['outputData']
# else:
# input_string = right_code_add[k]['input']
# output_string = right_code_add[k]['output']
# print(f"input_string: {input_string}")
# print(f"output_string: {output_string}")
# pos = 0
# solutions_new = []
# for idx, correct_code in enumerate(add_data[k]):
# if correct_code['lang'] != 'cpp':
# continue
# # correct_code['code'] = remove_freopen_lines_from_string(correct_code['code'])
# result = run_cpp_code_linux(correct_code['code'], input_string, output_string, correct_code['compileAndRunOptions']['std'], v['memoryLimit'], v['timeLimit'])
# if result['status'] == "AC":
# solutions_new.append(correct_code)
# print(f"{k} update {pos}")
# pos += 1
# if pos >= 8:
# break
# else:
# if idx % 10 == 0:
# print("failed")
# # break
# if len(solutions_new) <= 7:
# no_update_item[k] = len(solutions_new)
# item['solutions'] = solutions_new
# item['sample'] = {
# 'input': input_string,
# 'output':output_string
# }
# break
# json.dump(tcb_ben, open(f"/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/TestcaseBench-v18.json", "w", encoding="utf-8"), indent=4, ensure_ascii=False)
# # print(no_update_item)
add_data = json.load(open("/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/correct.json", "r", encoding="utf-8"))
tcb_ben = json.load(open("/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/TestcaseBench-v21.json", "r", encoding="utf-8"))
no_update_item = {}
titles = [
"王室联邦",
"世界树",
"和谐矩阵",
"工会 Guilds",
"程序设计竞赛 Programming Contest",
"木棍 Sticks",
"奥术神杖",
"RESTORAN"
]
new_dataset = []
for item in tcb_ben:
if item['tcb_id'] in titles:
continue
new_dataset.append(item)
if len(item['solutions']) > 0:
continue
k = item['wrong_code'][0]['problem']
print(k)
input_string = item['sample']['input']
output_string = item['sample']['output']
pos = 0
solutions_new = []
for idx, correct_code in enumerate(add_data[k]):
if correct_code['lang'] != 'cpp':
continue
result = run_cpp_code_linux(correct_code['code'], input_string, output_string, correct_code['compileAndRunOptions']['std'], item['memory_limit'], item['runtime_limit'])
if result['status'] == "AC":
solutions_new.append(correct_code)
print(f"{k} update {pos}")
pos += 1
if pos >= 8:
break
else:
if idx % 10 == 0:
print("failed")
item['solutions'] = solutions_new
json.dump(new_dataset, open(f"/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/TestcaseBench-v22.json", "w", encoding="utf-8"), indent=4, ensure_ascii=False)

Xet Storage Details

Size:
13 kB
·
Xet hash:
a3b91c8dce2e0fa4ae51f0881eece428c347e7e380d7e601eb36e85fdc4ed9e7

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.