import os import numpy as np from process_single_ldr import process_ldr_data # 导入处理单个ldr文件的函数 # 处理所有ldr文件 def process_all_ldr_in_folder(folder_path, output_folder): all_data = [] all_label_inverse_mappings = [] # 存储所有文件的label_inverse_mapping filenames = [] # 存储所有文件的文件名 line_count = 0 # 遍历文件夹中的所有文件夹 for root, dirs, files in os.walk(folder_path): for file in files: if file.endswith('.ldr') and file.startswith('modified'): # 只处理ldr文件 file_path = os.path.join(root, file) with open(file_path, 'r') as f: lines = f.readlines() if len(lines)>=300: print(f"Skipped {file} due to exceeding line limit.") # break line_count += len(lines) data, _ = process_ldr_data(lines) all_data.append(data) filenames.append(file) print(f"Processed {file}") # import ipdb; ipdb.set_trace() print(f"Total lines processed: {line_count}") #1263 #import ipdb; ipdb.set_trace() output_file = os.path.join(output_folder, "all_ldr_data_300_ronehot24.npz") np.savez_compressed(output_file, data=np.array(all_data, dtype=object), # 将所有数据以列表的形式存储 filenames=filenames) # 也可以保存文件名,方便后续查找 print(f"All LDR data have been processed and saved to {output_file}") # 主程序 if __name__ == "__main__": input_folder = '/public/home/wangshuo/gap/assembly/data' # 输入文件夹路径 output_folder = '/public/home/wangshuo/gap/assembly/data/processed_data' # 输出文件夹路径 os.makedirs(output_folder, exist_ok=True) # 确保输出文件夹存在 # 处理所有ldr文件并保存为npz文件 process_all_ldr_in_folder(input_folder, output_folder)