Tsukihjy's picture
download
raw
3.23 kB
import sys
sys.path.append("/home/i-luoxianzhen/data/TestCase-Gen/methods/utils")
from dataset_all import get_datasets_by_name
from config import cfg
import os
dataset = get_datasets_by_name("ours")
total_wrong_code = 0
empty = 0
for item in dataset:
tcb_id = item['tcb_id']
total_wrong_code += len(item['wrong_code'])
if not os.path.exists(cfg.file_algo.format(tcb_id)):
empty += len(item['wrong_code'])
print(f"empty {(empty / total_wrong_code) * 100:.2f}%")
from config import cfg
import sys
sys.path.append("/home/i-luoxianzhen/data/TestCase-Gen/methods/utils")
from dataset_all import get_datasets_by_name
def see_all():
all_count = 0
for batch in range(8):
al_dataset = get_datasets_by_name("ours")
start_pos = (batch) * 100
end_pos = (batch + 1) * 100 if (batch + 1) * 100 <= len(al_dataset) else len(al_dataset)
al_dataset = al_dataset[start_pos: end_pos]
import os
total_count = 0
for item in al_dataset:
if os.path.exists(cfg.file_algo.format(item['tcb_id'])):
total_count += 1
all_count += 1
print(total_count)
print(f"共计:{all_count}")
def find_index(batch, tcb_id):
al_dataset = get_datasets_by_name(cfg.dataset_name)
# start_pos = (batch) * 100
# end_pos = (batch + 1) * 100 if (batch + 1) * 100 <= len(al_dataset) else len(al_dataset)
# al_dataset = al_dataset[start_pos: end_pos]
import os
total_count = 0
for item in al_dataset:
if tcb_id == item['tcb_id']:
break
total_count += 1
print(total_count)
print("执行进度")
see_all()
def find_unexist():
all_count = 0
for batch in range(8):
al_dataset = get_datasets_by_name(cfg.dataset_name)
start_pos = (batch) * 100
end_pos = (batch + 1) * 100 if (batch + 1) * 100 <= len(al_dataset) else len(al_dataset)
al_dataset = al_dataset[start_pos: end_pos]
import os
total_count = 0
for item in al_dataset:
if not os.path.exists(cfg.file_algo.format(item['tcb_id'])):
print(item['tcb_id'])
all_count += 1
print(f"共计:{all_count}")
# tcb = "毒瘤题加强版"
# print(f"寻找idx {tcb}")
# find_index(7, tcb)
# find_unexist()
import json
# 读取多个jsonl文件并合并为一个数组
def read_jsonl_files():
all_data = []
for i in range(8):
with open(f"/home/i-luoxianzhen/data/TestCase-Gen/saved_tests/predo/test_pass_rate-{i}.json", 'r', encoding='utf-8') as f:
for line in f:
all_data.append(json.loads(line)) # 将每一行解析为一个字典并添加到all_data列表中
return all_data
# 计算 right_num != 0 的占比
def calculate_right_num_percentage(data):
count_non_zero = sum(1 for item in data if item.get('right_num', 0) != 0)
total_count = len(data)
return count_non_zero / total_count if total_count > 0 else 0
# # 读取文件并合并
# all_data = read_jsonl_files()
# # 计算 right_num != 0 的占比
# percentage = calculate_right_num_percentage(all_data)
# print(f"The percentage of items with right_num != 0 is: {percentage:.2%}")

Xet Storage Details

Size:
3.23 kB
·
Xet hash:
e439d106ece6d574cb5d7f12dc658af87d258339af7554bc373c4418516e7490

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.