hjkim00's picture
Restore all essential files - code, configs, and MBPP/HumanEval data
24c2665 verified
def check_id(data, n, task_id):
assert data[n]["task_id"] == task_id
def fix(data):
# fix: https://github.com/evalplus/evalplus/issues/156
check_id(data, 334, "Mbpp/734")
data[334]["prompt"] = data[334]["prompt"].replace(
"https://www.geeksforgeeks.org/sum-of-products-of-all-possible-subarrays/", ""
)
check_id(data, 335, "Mbpp/735")
data[335]["prompt"] = data[335]["prompt"].replace(
"https://www.geeksforgeeks.org/toggle-bits-number-expect-first-last-bits/", ""
)
check_id(data, 336, "Mbpp/736")
data[336]["prompt"] = data[336]["prompt"].replace(
"https://www.w3resource.com/python-exercises/data-structures-and-algorithms/python-data-structure-exercise-24.php",
"",
)
check_id(data, 338, "Mbpp/739")
data[338]["prompt"] = data[338]["prompt"].replace(
"https://www.geeksforgeeks.org/index-of-smallest-triangular-number-with-n-digits/",
"",
)
check_id(data, 339, "Mbpp/740")
data[339]["prompt"] = data[339]["prompt"].replace(
"https://www.geeksforgeeks.org/python-convert-tuple-to-adjacent-pair-dictionary/",
"",
)
check_id(data, 342, "Mbpp/743")
data[342]["prompt"] = data[342]["prompt"].replace(
"https://www.geeksforgeeks.org/python-program-right-rotate-list-n/", ""
)
check_id(data, 344, "Mbpp/745")
data[344]["prompt"] = data[344]["prompt"].replace(
"https://www.w3resource.com/python-exercises/lambda/python-lambda-exercise-24.php",
"",
)
check_id(data, 347, "Mbpp/749")
data[347]["prompt"] = data[347]["prompt"].replace(
"https://www.geeksforgeeks.org/python-sort-numeric-strings-in-a-list/", ""
)
check_id(data, 349, "Mbpp/751")
data[349]["prompt"] = data[349]["prompt"].replace(
"https://www.geeksforgeeks.org/how-to-check-if-a-given-array-represents-a-binary-heap/",
"",
)
check_id(data, 350, "Mbpp/752")
data[350]["prompt"] = data[350]["prompt"].replace(
"https://www.geeksforgeeks.org/jacobsthal-and-jacobsthal-lucas-numbers/", ""
)
check_id(data, 351, "Mbpp/753")
data[351]["prompt"] = data[351]["prompt"].replace(
"https://www.geeksforgeeks.org/python-find-minimum-k-records-from-tuple-list/",
"",
)
check_id(data, 354, "Mbpp/757")
data[354]["prompt"] = data[354]["prompt"].replace(
"https://www.geeksforgeeks.org/python-program-to-count-the-pairs-of-reverse-strings/",
"",
)
check_id(data, 359, "Mbpp/763")
data[359]["prompt"] = data[359]["prompt"].replace(
"https://www.geeksforgeeks.org/find-minimum-difference-pair/", ""
)
check_id(data, 366, "Mbpp/771")
data[366]["prompt"] = data[366]["prompt"].replace(
"https://www.geeksforgeeks.org/check-for-balanced-parentheses-in-an-expression/",
"",
)
check_id(data, 372, "Mbpp/780")
data[372]["prompt"] = data[372]["prompt"].replace(
"https://www.geeksforgeeks.org/python-combinations-of-sum-with-tuples-in-tuple-list/",
"",
)
check_id(data, 373, "Mbpp/781")
data[373]["prompt"] = data[373]["prompt"].replace(
"https://www.w3resource.com/python-exercises/basic/python-basic-1-exercise-24.php",
"",
)
check_id(data, 374, "Mbpp/782")
data[374]["prompt"] = data[374]["prompt"].replace(
"https://www.geeksforgeeks.org/sum-of-all-odd-length-subarrays/", ""
)
check_id(data, 392, "Mbpp/803")
data[392]["prompt"] = data[392]["prompt"].replace(
"https://www.geeksforgeeks.org/check-if-given-number-is-perfect-square-in-cpp/",
"",
)
# fix: https://github.com/evalplus/evalplus/issues/147
check_id(data, 375, "Mbpp/783")
del data[375]
check_id(data, 345, "Mbpp/746")
del data[345]
check_id(data, 318, "Mbpp/640")
del data[318]
check_id(data, 282, "Mbpp/595")
del data[282]
check_id(data, 270, "Mbpp/582")
del data[270]
check_id(data, 263, "Mbpp/574")
del data[263]
check_id(data, 231, "Mbpp/461")
del data[231]
check_id(data, 216, "Mbpp/442")
del data[216]
check_id(data, 212, "Mbpp/438")
del data[212]
check_id(data, 206, "Mbpp/431")
del data[206]
check_id(data, 187, "Mbpp/407")
del data[187]
check_id(data, 183, "Mbpp/400")
del data[183]
check_id(data, 180, "Mbpp/396")
del data[180]
check_id(data, 160, "Mbpp/295")
del data[160]
check_id(data, 121, "Mbpp/249")
del data[121]
check_id(data, 107, "Mbpp/229")
del data[107]
check_id(data, 94, "Mbpp/164")
del data[94]
check_id(data, 89, "Mbpp/143")
del data[89]
check_id(data, 67, "Mbpp/117")
del data[67]
check_id(data, 65, "Mbpp/115")
del data[65]
check_id(data, 37, "Mbpp/83")
del data[37]
return data
if __name__ == "__main__":
import json
TASK_INSPECT = [
"Mbpp/734",
"Mbpp/735",
"Mbpp/736",
"Mbpp/739",
"Mbpp/740",
"Mbpp/743",
"Mbpp/745",
"Mbpp/749",
"Mbpp/751",
"Mbpp/752",
"Mbpp/753",
"Mbpp/757",
"Mbpp/763",
"Mbpp/771",
"Mbpp/780",
"Mbpp/781",
"Mbpp/782",
"Mbpp/803",
]
SOURCE_VERSION = "v0.1.0"
TARGET_VERSION = "v0.2.0"
def evolve(src_file, tgt_file):
with open(src_file) as f:
data = [json.loads(line) for line in f.readlines() if line]
data = fix(data)
with open(tgt_file, "wb") as f:
for x in data:
f.write((json.dumps(x) + "\n").encode("utf-8"))
evolve(f"MbppPlus-{SOURCE_VERSION}.jsonl", f"MbppPlus-{TARGET_VERSION}.jsonl")
# Inspect the output of jsonl
with open(f"MbppPlus-{TARGET_VERSION}.jsonl") as f:
data = [json.loads(line) for line in f.readlines() if line]
data = {x["task_id"]: x for x in data}
for task_id in TASK_INSPECT:
print(data[task_id]["prompt"])
print("====================================")