Spaces:
Sleeping
Sleeping
| from datasets import load_dataset, DatasetDict | |
| SEED = 21 | |
| print("Downloading dataset from hugging face...") | |
| # Download the dataset from huggingface | |
| ds = load_dataset("DScomp380/plant_village") | |
| # extract dataset from DatasetDict | |
| ds = ds['train'] | |
| print("Splitting dataset into train/test/validation...") | |
| # First extract the training set | |
| temp = ds.train_test_split(train_size=0.70, shuffle=True, seed=SEED) | |
| # then split remaining dataset for test/validation | |
| test_valid_ds = temp['test'].train_test_split(train_size=0.5, shuffle=True, seed=SEED) | |
| # assign the sub datasets | |
| train_ds = temp['train'] | |
| validation_ds = test_valid_ds['train'] | |
| test_ds = test_valid_ds['test'] | |
| # combine into one DatasetDict | |
| ds_dict = DatasetDict({ | |
| "train": train_ds, | |
| "test": test_ds, | |
| "validation": validation_ds | |
| }) | |
| ds_dict.save_to_disk("data/processed_plant_village") |