Tsukihjy's picture
download
raw
4.04 kB
from config import cfg
import sys
sys.path.append("/home/i-luoxianzhen/data/TestCase-Gen/methods/utils")
from dataset_all import get_datasets_by_name
def see_all():
all_count = 0
for batch in range(9):
al_dataset = get_datasets_by_name(cfg.dataset_name)
start_pos = (batch) * 100
end_pos = (batch + 1) * 100 if (batch + 1) * 100 <= len(al_dataset) else len(al_dataset)
al_dataset = al_dataset[start_pos: end_pos]
import os
total_count = 0
for item in al_dataset:
if os.path.exists(cfg.tests_path.format(item['tcb_id'])):
total_count += 1
all_count += 1
print(total_count)
print(f"共计:{all_count}")
def find_index(batch, tcb_id):
al_dataset = get_datasets_by_name(cfg.dataset_name)
print(f"数据集大小{len(al_dataset)}")
# start_pos = (batch) * 100
# end_pos = (batch + 1) * 100 if (batch + 1) * 100 <= len(al_dataset) else len(al_dataset)
# al_dataset = al_dataset[start_pos: end_pos]
import os
total_count = 0
for item in al_dataset:
if tcb_id == item['tcb_id']:
break
total_count += 1
print(total_count)
print("执行进度")
see_all()
def find_unexist():
all_count = 0
for batch in range(8):
al_dataset = get_datasets_by_name(cfg.dataset_name)
start_pos = (batch) * 100
end_pos = (batch + 1) * 100 if (batch + 1) * 100 <= len(al_dataset) else len(al_dataset)
al_dataset = al_dataset[start_pos: end_pos]
import os
total_count = 0
for item in al_dataset:
if not os.path.exists(cfg.tests_path.format(item['tcb_id'])):
print(item['tcb_id'])
all_count += 1
print(f"共计:{all_count}")
tcb = "数组游戏"
print(f"寻找idx {tcb}")
find_index(2, tcb)
# find_unexist()
# import os
# import json
# def calculate_average_samples(directory):
# """
# 读取指定目录下所有的jsonl文件,计算平均的样本个数。
# 参数:
# directory: 文件夹路径,包含 .jsonl 文件。
# 返回:
# float: 所有文件中的平均样本个数。
# """
# sample_counts = []
# # 遍历目录中的所有文件
# for filename in os.listdir(directory):
# if filename.endswith('.jsonl') and "test_pass_rate" not in filename:
# file_path = os.path.join(directory, filename)
# # 读取每个jsonl文件并统计样本个数
# with open(file_path, 'r', encoding='utf-8') as file:
# lines = file.readlines()
# sample_counts.append(len(lines)) # 每个文件的行数即为样本数
# # 计算并返回平均值
# if sample_counts:
# print(f"平均testcase个数: {sum(sample_counts) / 731}")
# print(f"平均testcase个数(不计入生成0个的题目): {sum(sample_counts) / len(sample_counts)}")
# return sum(sample_counts) / 731
# else:
# return 0.0
# # 示例用法
# directory = '/home/i-luoxianzhen/data/TestCase-Gen/saved_tests/predo/' # 替换为你的文件夹路径
# average_samples = calculate_average_samples(directory)
# # print(f"平均样本个数: {average_samples}")
# def calculate_average_pass(directory):
# all_gen = 0
# right = 0
# # 遍历目录中的所有文件
# for filename in os.listdir(directory):
# if filename.endswith('.json') and "test_pass_rate" in filename:
# file_path = os.path.join(directory, filename)
# # 读取每个jsonl文件并统计样本个数
# with open(file_path, 'r', encoding='utf-8') as file:
# for line in file.readlines():
# item = json.loads(line)
# all_gen += item['gen_nums']
# right += item['right_nums']
# # 计算并返回平均值
# return right / all_gen
# print(f"pass rate {calculate_average_pass(directory)}")

Xet Storage Details

Size:
4.04 kB
·
Xet hash:
83fec1dc17e1719006f13489616e35074dc6254d2e06ac90e1a29673676b4a97

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.