| import os | |
| from datasets import load_from_disk, concatenate_datasets | |
| def load_and_merge_datasets(directories): | |
| datasets = [] | |
| for directory in directories: | |
| dataset = load_from_disk(directory) | |
| datasets.append(dataset) | |
| merged_dataset = concatenate_datasets(datasets) | |
| return merged_dataset | |
| data_directories = ["/bask/projects/p/phwq4930-gbm/Zeyu/WSI_Dataset/WVLMdata_part0", | |
| "/bask/projects/p/phwq4930-gbm/Zeyu/WSI_Dataset/WVLMdata_part1", | |
| "/bask/projects/p/phwq4930-gbm/Zeyu/WSI_Dataset/WVLMdata_part2", | |
| "/bask/projects/p/phwq4930-gbm/Zeyu/WSI_Dataset/WVLMdata_part3"] | |
| merged_dataset = load_and_merge_datasets(data_directories) | |
| merged_dataset.push_to_hub("CNX-PathLLM/TCGA-WSI-Text") | |