| from config import cfg | |
| import sys | |
| sys.path.append("/home/i-luoxianzhen/data/TestCase-Gen/methods/utils") | |
| from dataset_all import get_datasets_by_name | |
| def see_all(): | |
| all_count = 0 | |
| for batch in range(9): | |
| al_dataset = get_datasets_by_name(cfg.dataset_name) | |
| start_pos = (batch) * 100 | |
| end_pos = (batch + 1) * 100 if (batch + 1) * 100 <= len(al_dataset) else len(al_dataset) | |
| al_dataset = al_dataset[start_pos: end_pos] | |
| import os | |
| total_count = 0 | |
| for item in al_dataset: | |
| if os.path.exists(cfg.tests_path.format(item['tcb_id'])): | |
| total_count += 1 | |
| all_count += 1 | |
| print(total_count) | |
| print(f"共计:{all_count}") | |
| def find_index(batch, tcb_id): | |
| al_dataset = get_datasets_by_name(cfg.dataset_name) | |
| print(f"数据集大小{len(al_dataset)}") | |
| # start_pos = (batch) * 100 | |
| # end_pos = (batch + 1) * 100 if (batch + 1) * 100 <= len(al_dataset) else len(al_dataset) | |
| # al_dataset = al_dataset[start_pos: end_pos] | |
| import os | |
| total_count = 0 | |
| for item in al_dataset: | |
| if tcb_id == item['tcb_id']: | |
| break | |
| total_count += 1 | |
| print(total_count) | |
| print("执行进度") | |
| see_all() | |
| def find_unexist(): | |
| all_count = 0 | |
| for batch in range(8): | |
| al_dataset = get_datasets_by_name(cfg.dataset_name) | |
| start_pos = (batch) * 100 | |
| end_pos = (batch + 1) * 100 if (batch + 1) * 100 <= len(al_dataset) else len(al_dataset) | |
| al_dataset = al_dataset[start_pos: end_pos] | |
| import os | |
| total_count = 0 | |
| for item in al_dataset: | |
| if not os.path.exists(cfg.tests_path.format(item['tcb_id'])): | |
| print(item['tcb_id']) | |
| all_count += 1 | |
| print(f"共计:{all_count}") | |
| tcb = "数组游戏" | |
| print(f"寻找idx {tcb}") | |
| find_index(2, tcb) | |
| # find_unexist() | |
| # import os | |
| # import json | |
| # def calculate_average_samples(directory): | |
| # """ | |
| # 读取指定目录下所有的jsonl文件,计算平均的样本个数。 | |
| # 参数: | |
| # directory: 文件夹路径,包含 .jsonl 文件。 | |
| # 返回: | |
| # float: 所有文件中的平均样本个数。 | |
| # """ | |
| # sample_counts = [] | |
| # # 遍历目录中的所有文件 | |
| # for filename in os.listdir(directory): | |
| # if filename.endswith('.jsonl') and "test_pass_rate" not in filename: | |
| # file_path = os.path.join(directory, filename) | |
| # # 读取每个jsonl文件并统计样本个数 | |
| # with open(file_path, 'r', encoding='utf-8') as file: | |
| # lines = file.readlines() | |
| # sample_counts.append(len(lines)) # 每个文件的行数即为样本数 | |
| # # 计算并返回平均值 | |
| # if sample_counts: | |
| # print(f"平均testcase个数: {sum(sample_counts) / 731}") | |
| # print(f"平均testcase个数(不计入生成0个的题目): {sum(sample_counts) / len(sample_counts)}") | |
| # return sum(sample_counts) / 731 | |
| # else: | |
| # return 0.0 | |
| # # 示例用法 | |
| # directory = '/home/i-luoxianzhen/data/TestCase-Gen/saved_tests/predo/' # 替换为你的文件夹路径 | |
| # average_samples = calculate_average_samples(directory) | |
| # # print(f"平均样本个数: {average_samples}") | |
| # def calculate_average_pass(directory): | |
| # all_gen = 0 | |
| # right = 0 | |
| # # 遍历目录中的所有文件 | |
| # for filename in os.listdir(directory): | |
| # if filename.endswith('.json') and "test_pass_rate" in filename: | |
| # file_path = os.path.join(directory, filename) | |
| # # 读取每个jsonl文件并统计样本个数 | |
| # with open(file_path, 'r', encoding='utf-8') as file: | |
| # for line in file.readlines(): | |
| # item = json.loads(line) | |
| # all_gen += item['gen_nums'] | |
| # right += item['right_nums'] | |
| # # 计算并返回平均值 | |
| # return right / all_gen | |
| # print(f"pass rate {calculate_average_pass(directory)}") |
Xet Storage Details
- Size:
- 4.04 kB
- Xet hash:
- 83fec1dc17e1719006f13489616e35074dc6254d2e06ac90e1a29673676b4a97
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.