Spaces:
Build error
Build error
| from sklearn.model_selection import train_test_split | |
| from glob import glob | |
| import shutil | |
| import os | |
| def split_data_from_dir(path: str, new_path: str, test_size: float = 0.2, valid_size: float = 0.2, force_placement: bool = True): | |
| assert test_size > 0 and test_size < 0.5 and valid_size >= 0 and valid_size < 0.5 | |
| assert os.path.exists(path) and os.path.isdir(path) | |
| assert os.path.exists(new_path) and os.path.isdir(new_path) | |
| # let us recuperate the images' path from the directory | |
| dirs = os.listdir(path) | |
| # let us recuperate the image of each directory and split the images before making them in new directories | |
| for dir_ in dirs: | |
| # let us recuperate the path of the directory | |
| dir_path = os.path.join(path, dir_) | |
| # let us verify if it is truly a directory before making the following processes | |
| if os.path.isdir(dir_path): | |
| # let us recuperate the files' paths in it | |
| images = os.listdir(dir_path) | |
| # let us split the data between training and test + valid sets | |
| train_set, test_valid_set = train_test_split(images, test_size = test_size + valid_size) | |
| # let us split the test + valid sets between test and valid sets | |
| test_set, valid_set = train_test_split(test_valid_set, test_size = valid_size) | |
| # let us create the train test and valid directories | |
| if not os.path.exists(os.path.join(os.path.join(new_path, 'train'), dir_)) or\ | |
| not os.path.exists(os.path.join(os.path.join(new_path, 'test'), dir_)) or\ | |
| not os.path.exists(os.path.join(os.path.join(new_path, 'valid'), dir_)): | |
| [os.makedirs(os.path.join(os.path.join(new_path, set_), dir_)) for set_ in ['train', 'test', 'valid']] | |
| elif not force_placement: | |
| raise OSError(f"One of the training, validation or testing directory for the class {dir_} already exists! Enable the force_placement argument if you want to use already created directories.") | |
| # let us place the sets in their locations | |
| [shutil.copyfile(os.path.join(dir_path, image), os.path.join(os.path.join(os.path.join(new_path, 'train'), dir_), image)) for image in train_set] | |
| [shutil.copyfile(os.path.join(dir_path, image), os.path.join(os.path.join(os.path.join(new_path, 'test'), dir_), image)) for image in test_set] | |
| [shutil.copyfile(os.path.join(dir_path, image), os.path.join(os.path.join(os.path.join(new_path, 'valid'), dir_), image)) for image in valid_set] | |
| print(f"All the file in {path} was copied in {new_path} successfully!") | |