| import os |
| import numpy as np |
| from tqdm import tqdm |
| from concurrent.futures import ProcessPoolExecutor, as_completed |
|
|
| |
| cache_dir = "/gemini/user/private/zhaotianhao/dataset_cache/MERGED_DATASET_count_200_2000_100000_128to1024_819200" |
|
|
| |
| target_res_edge = 512 |
| min_edge_voxels = 2000 |
| max_edge_voxels = 75000 |
|
|
| |
| |
| target_res_active = 128 |
| min_active_voxels = 2000 |
| max_active_voxels = 326780 |
|
|
| save_txt_path = f"/gemini/user/private/zhaotianhao/Triposf/MERGED_DATASET_filtered_{min_edge_voxels}-{max_edge_voxels}edge_{min_active_voxels}-{max_active_voxels}active.txt" |
|
|
| |
| def check_voxel_counts(npz_path): |
| try: |
| |
| with np.load(npz_path) as data: |
| |
| key_edge = f"combined_voxels_{target_res_edge}" |
| key_active = f"active_voxels_{target_res_active}" |
| |
| |
| if key_edge not in data or key_active not in data: |
| return None |
| |
| |
| count_edge = len(data[key_edge]) |
| count_active = len(data[key_active]) |
| |
| |
| is_edge_valid = min_edge_voxels <= count_edge <= max_edge_voxels |
| is_active_valid = min_active_voxels <= count_active <= max_active_voxels |
| |
| if is_edge_valid and is_active_valid: |
| base_name = os.path.basename(npz_path) |
| |
| if base_name.endswith("_precombined.npz"): |
| original_name = base_name.replace("_precombined.npz", "") |
| else: |
| original_name = os.path.splitext(base_name)[0] |
| |
| return (original_name, count_edge, count_active) |
| |
| except Exception: |
| return None |
| return None |
|
|
| |
| if not os.path.exists(cache_dir): |
| print(f"错误: 缓存目录不存在 {cache_dir}") |
| exit() |
|
|
| npz_files = [os.path.join(cache_dir, f) for f in os.listdir(cache_dir) if f.endswith(".npz")] |
| print(f"共发现 {len(npz_files)} 个缓存文件。开始并行过滤...") |
| print(f"筛选条件:") |
| print(f" - Edge (512): {min_edge_voxels} ~ {max_edge_voxels}") |
| print(f" - Active (64): {min_active_voxels} ~ {max_active_voxels}") |
|
|
| |
| filtered_files = [] |
| counts_edge = [] |
| counts_active = [] |
|
|
| with ProcessPoolExecutor(max_workers=os.cpu_count()) as executor: |
| futures = {executor.submit(check_voxel_counts, path): path for path in npz_files} |
| |
| for future in tqdm(as_completed(futures), total=len(futures), desc="Filtering"): |
| result = future.result() |
| if result is not None: |
| fname, c_edge, c_active = result |
| filtered_files.append(fname) |
| counts_edge.append(c_edge) |
| counts_active.append(c_active) |
|
|
| |
| os.makedirs(os.path.dirname(save_txt_path), exist_ok=True) |
| with open(save_txt_path, "w") as f: |
| for fname in filtered_files: |
| f.write(f"{fname}\n") |
|
|
| |
| print(f"\n✅ 筛选完成:") |
| print(f" 符合条件的文件数: {len(filtered_files)} / {len(npz_files)} (保留率: {len(filtered_files)/len(npz_files)*100:.2f}%)") |
|
|
| if counts_edge: |
| print(f"\n[统计 - Edge Voxels (512)]") |
| print(f" 最小值: {min(counts_edge)}") |
| print(f" 最大值: {max(counts_edge)}") |
| print(f" 平均值: {np.mean(counts_edge):.2f}") |
|
|
| if counts_active: |
| print(f"\n[统计 - Active Voxels (64)]") |
| print(f" 最小值: {min(counts_active)}") |
| print(f" 最大值: {max(counts_active)}") |
| print(f" 平均值: {np.mean(counts_active):.2f}") |
|
|
| print(f"\n 结果已保存到: {save_txt_path}") |