import torch import torch.nn as nn from torch.utils.data import DataLoader from torchvision import transforms from datasets import load_dataset import torch.nn.functional as F from utils.config import load_config config = load_config() batch_size = config["batch_size"] num_workers = config["num_workers"] mean_nm = config["normalize_mean"] std_nm = config["normalize_std"] # let's select the first GPU device if available device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') dataset = load_dataset("DScomp380/plant_village") #split dataset into train(70%), and 30% remaining for val and test splits = dataset["train"].train_test_split(test_size=0.30, seed=42) train_split = splits["train"] #training set remaining = splits["test"] #split remaining 30% into val(15%) and test(15%) val_test = remaining.train_test_split(test_size=0.5, seed=42) val_split = val_test["train"] #validation set test_split = val_test["test"] #test set transform = transforms.Compose([ # resize images to 224x224, convert to tensor, and normalize transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=mean_nm, std=std_nm) ]) def transform_batch(batch): # apply transformations to a batch of images batch["pixel_values"] = [transform(img) for img in batch["image"]] return batch # apply transformations to the datasets train_split = train_split.with_transform(transform_batch) val_split = val_split.with_transform(transform_batch) test_split = test_split.with_transform(transform_batch) def collate_fn(batch): # custom collate function to handle batching return { "pixel_values": torch.stack([item["pixel_values"] for item in batch]), "labels": torch.tensor([item["label"] for item in batch]) } # create DataLoaders for train, val, and test sets train_loader = DataLoader(train_split, batch_size=batch_size, shuffle=True, num_workers=num_workers, collate_fn=collate_fn) val_loader = DataLoader(val_split, batch_size=batch_size, shuffle=False, num_workers=num_workers, collate_fn=collate_fn) test_loader = DataLoader(test_split, batch_size=batch_size, shuffle=False, num_workers=num_workers, collate_fn=collate_fn) if __name__ == "__main__": print(device) print(f"Loaded PlantVillage dataset with splits: {dataset}")