| import json | |
| import os | |
| import numpy as np | |
| def load_all_wrong_code_subset(name="tcb", prefix_dir=None, testcase_alg = ""): | |
| ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8")) | |
| tcb_id_transform = {} | |
| for item in ds: | |
| tcb_id_transform[item['wrong_code'][0]['problem']] = item['tcb_id'] | |
| all_wrong_code = json.load(open("/home/luoxianzhen/yang/data/Ours/all_wrong_code/data/all_wrong_code_subset.json", "r", encoding="utf-8")) | |
| w_distributions = {} | |
| for item in all_wrong_code: | |
| matrix = [] | |
| for idx, c in enumerate(item["all_wrng_code"]): | |
| output_str = c["output_str"] | |
| binary = [1 if ch == 'W' else 0 for ch in output_str] | |
| matrix.append(binary) | |
| col_sum = np.sum(matrix, axis=0) | |
| w_distributions[item['name']] = ",".join(map(str, col_sum)) | |
| json.dump(w_distributions, open("/home/luoxianzhen/yang/data/add_experience/wrong_code_distribute.json", "w", encoding="utf-8"), indent=4, ensure_ascii=False) | |
| return w_distributions | |
| if __name__ == "__main__": | |
| data = load_all_wrong_code_subset(name="tcb", prefix_dir=f"/home/luoxianzhen/yang/save_tests_gpt-4o-type/lcb/", testcase_alg="algo") |
Xet Storage Details
- Size:
- 1.24 kB
- Xet hash:
- af9b11e66b1a25ec0bb5d9ed2922fa927da3cae153d945cfe5ed5d4d7a1a75d9
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.