| import sys | |
| sys.path.append("/home/i-luoxianzhen/data/TestCase-Gen/methods/utils") | |
| from dataset_all import get_datasets_by_name | |
| from config import cfg | |
| import os | |
| dataset = get_datasets_by_name("ours") | |
| total_wrong_code = 0 | |
| empty = 0 | |
| for item in dataset: | |
| tcb_id = item['tcb_id'] | |
| total_wrong_code += len(item['wrong_code']) | |
| if not os.path.exists(cfg.file_algo.format(tcb_id)): | |
| empty += len(item['wrong_code']) | |
| print(f"empty {(empty / total_wrong_code) * 100:.2f}%") | |
| from config import cfg | |
| import sys | |
| sys.path.append("/home/i-luoxianzhen/data/TestCase-Gen/methods/utils") | |
| from dataset_all import get_datasets_by_name | |
| def see_all(): | |
| all_count = 0 | |
| for batch in range(8): | |
| al_dataset = get_datasets_by_name("ours") | |
| start_pos = (batch) * 100 | |
| end_pos = (batch + 1) * 100 if (batch + 1) * 100 <= len(al_dataset) else len(al_dataset) | |
| al_dataset = al_dataset[start_pos: end_pos] | |
| import os | |
| total_count = 0 | |
| for item in al_dataset: | |
| if os.path.exists(cfg.file_algo.format(item['tcb_id'])): | |
| total_count += 1 | |
| all_count += 1 | |
| print(total_count) | |
| print(f"共计:{all_count}") | |
| def find_index(batch, tcb_id): | |
| al_dataset = get_datasets_by_name(cfg.dataset_name) | |
| # start_pos = (batch) * 100 | |
| # end_pos = (batch + 1) * 100 if (batch + 1) * 100 <= len(al_dataset) else len(al_dataset) | |
| # al_dataset = al_dataset[start_pos: end_pos] | |
| import os | |
| total_count = 0 | |
| for item in al_dataset: | |
| if tcb_id == item['tcb_id']: | |
| break | |
| total_count += 1 | |
| print(total_count) | |
| print("执行进度") | |
| see_all() | |
| def find_unexist(): | |
| all_count = 0 | |
| for batch in range(8): | |
| al_dataset = get_datasets_by_name(cfg.dataset_name) | |
| start_pos = (batch) * 100 | |
| end_pos = (batch + 1) * 100 if (batch + 1) * 100 <= len(al_dataset) else len(al_dataset) | |
| al_dataset = al_dataset[start_pos: end_pos] | |
| import os | |
| total_count = 0 | |
| for item in al_dataset: | |
| if not os.path.exists(cfg.file_algo.format(item['tcb_id'])): | |
| print(item['tcb_id']) | |
| all_count += 1 | |
| print(f"共计:{all_count}") | |
| # tcb = "毒瘤题加强版" | |
| # print(f"寻找idx {tcb}") | |
| # find_index(7, tcb) | |
| # find_unexist() | |
| import json | |
| # 读取多个jsonl文件并合并为一个数组 | |
| def read_jsonl_files(): | |
| all_data = [] | |
| for i in range(8): | |
| with open(f"/home/i-luoxianzhen/data/TestCase-Gen/saved_tests/predo/test_pass_rate-{i}.json", 'r', encoding='utf-8') as f: | |
| for line in f: | |
| all_data.append(json.loads(line)) # 将每一行解析为一个字典并添加到all_data列表中 | |
| return all_data | |
| # 计算 right_num != 0 的占比 | |
| def calculate_right_num_percentage(data): | |
| count_non_zero = sum(1 for item in data if item.get('right_num', 0) != 0) | |
| total_count = len(data) | |
| return count_non_zero / total_count if total_count > 0 else 0 | |
| # # 读取文件并合并 | |
| # all_data = read_jsonl_files() | |
| # # 计算 right_num != 0 的占比 | |
| # percentage = calculate_right_num_percentage(all_data) | |
| # print(f"The percentage of items with right_num != 0 is: {percentage:.2%}") | |
Xet Storage Details
- Size:
- 3.23 kB
- Xet hash:
- e439d106ece6d574cb5d7f12dc658af87d258339af7554bc373c4418516e7490
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.